diff --git a/CMakeLists.txt b/CMakeLists.txt index 553da9245bc5d805b9a95cc2120d6b6783da2b30..41231f053bdf416119876f7501ffdcf9b17b4231 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -296,5 +296,6 @@ ENDIF () ADD_SUBDIRECTORY(deps) ADD_SUBDIRECTORY(src) +ADD_SUBDIRECTORY(tests) INCLUDE(CPack) diff --git a/src/client/CMakeLists.txt b/src/client/CMakeLists.txt index a64f0581fb01ee6093e3fdc284030e0303f59060..55fa45475a658105c33ae620cd8a5d922a838466 100644 --- a/src/client/CMakeLists.txt +++ b/src/client/CMakeLists.txt @@ -6,6 +6,7 @@ INCLUDE_DIRECTORIES(jni) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/inc) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/util/inc) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/query/inc) +INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/common/inc) INCLUDE_DIRECTORIES(${TD_ENTERPRISE_DIR}/src/inc) INCLUDE_DIRECTORIES(${TD_OS_DIR}/inc) AUX_SOURCE_DIRECTORY(src SRC) diff --git a/src/client/inc/tscSQLParser.h b/src/client/inc/tscSQLParser.h deleted file mode 100644 index c7f8ba06e8fc20ea4be57784861ad86b4089aad2..0000000000000000000000000000000000000000 --- a/src/client/inc/tscSQLParser.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_TSQL_H -#define TDENGINE_TSQL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include "taos.h" -#include "taosmsg.h" -#include "ttokendef.h" -#include "taosdef.h" -#include "tvariant.h" -#include "qsqlparser.h" - -enum { - TSQL_NODE_TYPE_EXPR = 0x1, - TSQL_NODE_TYPE_ID = 0x2, - TSQL_NODE_TYPE_VALUE = 0x4, -}; - -#define NON_ARITHMEIC_EXPR 0 -#define NORMAL_ARITHMETIC 1 -#define AGG_ARIGHTMEIC 2 - -int32_t tSQLParse(SSqlInfo *pSQLInfo, const char *pSql); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/client/inc/tscSecondaryMerge.h b/src/client/inc/tscSecondaryMerge.h index 08d995c9f3d789a82f5b8fa1331d8653a017181b..5370d0ec5259c3e47e065d4fe8b37884bdf6a050 100644 --- a/src/client/inc/tscSecondaryMerge.h +++ b/src/client/inc/tscSecondaryMerge.h @@ -20,9 +20,9 @@ extern "C" { #endif +#include "qextbuffer.h" +#include "qinterpolation.h" #include "taosmsg.h" -#include "textbuffer.h" -#include "tinterpolation.h" #include "tlosertree.h" #include "tsclient.h" diff --git a/src/client/inc/tscCache.h b/src/client/inc/tscSubquery.h similarity index 50% rename from src/client/inc/tscCache.h rename to src/client/inc/tscSubquery.h index 4c6acec096c01db64b09c4f0d18f404b8825f7b6..f8a6fbf5b11d688b16bf7625771b1e893bed52c1 100644 --- a/src/client/inc/tscCache.h +++ b/src/client/inc/tscSubquery.h @@ -13,23 +13,33 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_TSCCACHE_H -#define TDENGINE_TSCCACHE_H +#ifndef TDENGINE_TSCJOINPROCESS_H +#define TDENGINE_TSCJOINPROCESS_H #ifdef __cplusplus extern "C" { #endif -void *taosOpenConnCache(int maxSessions, void (*cleanFp)(void *), void *tmrCtrl, int64_t keepTimer); +#include "tscUtil.h" +#include "tsclient.h" -void taosCloseConnCache(void *handle); +void tscFetchDatablockFromSubquery(SSqlObj* pSql); -void *taosAddConnIntoCache(void *handle, void *data, uint32_t ip, uint16_t port, char *user); +void tscSetupOutputColumnIndex(SSqlObj* pSql); +int32_t tscLaunchSecondPhaseSubqueries(SSqlObj* pSql); +void tscJoinQueryCallback(void* param, TAOS_RES* tres, int code); -void *taosGetConnFromCache(void *handle, uint32_t ip, uint16_t port, char *user); +SJoinSubquerySupporter* tscCreateJoinSupporter(SSqlObj* pSql, SSubqueryState* pState, int32_t index); +void tscDestroyJoinSupporter(SJoinSubquerySupporter* pSupporter); + +int32_t tscHandleMasterJoinQuery(SSqlObj* pSql); + +int32_t tscHandleMasterSTableQuery(SSqlObj *pSql); + +int32_t tscHandleMultivnodeInsert(SSqlObj *pSql); #ifdef __cplusplus } #endif -#endif // TDENGINE_TSCACHE_H +#endif // TDENGINE_TSCJOINPROCESS_H diff --git a/src/client/inc/tscUtil.h b/src/client/inc/tscUtil.h index a869e45198fe2da615b2178c55a32bcd64a9285a..e9d231171512c8cb9c54b0ababf386bfa821dbf2 100644 --- a/src/client/inc/tscUtil.h +++ b/src/client/inc/tscUtil.h @@ -24,16 +24,16 @@ extern "C" { * @date 2018/09/30 */ #include "os.h" -#include "textbuffer.h" +#include "qextbuffer.h" +#include "taosdef.h" #include "tscSecondaryMerge.h" #include "tsclient.h" -#include "taosdef.h" -#define UTIL_METER_IS_SUPERTABLE(metaInfo) \ - (((metaInfo)->pMeterMeta != NULL) && ((metaInfo)->pMeterMeta->tableType == TSDB_TABLE_TYPE_SUPER_TABLE)) -#define UTIL_METER_IS_NOMRAL_METER(metaInfo) (!(UTIL_METER_IS_SUPERTABLE(metaInfo))) -#define UTIL_METER_IS_CREATE_FROM_METRIC(metaInfo) \ - (((metaInfo)->pMeterMeta != NULL) && ((metaInfo)->pMeterMeta->tableType == TSDB_TABLE_TYPE_CHILD_TABLE)) +#define UTIL_TABLE_IS_SUPERTABLE(metaInfo) \ + (((metaInfo)->pTableMeta != NULL) && ((metaInfo)->pTableMeta->tableType == TSDB_SUPER_TABLE)) +#define UTIL_TABLE_IS_NOMRAL_TABLE(metaInfo) (!(UTIL_TABLE_IS_SUPERTABLE(metaInfo))) +#define UTIL_TABLE_CREATE_FROM_STABLE(metaInfo) \ + (((metaInfo)->pTableMeta != NULL) && ((metaInfo)->pTableMeta->tableType == TSDB_CHILD_TABLE)) #define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0) @@ -67,7 +67,7 @@ typedef struct SJoinSubquerySupporter { } SJoinSubquerySupporter; int32_t tscCreateDataBlock(size_t initialSize, int32_t rowSize, int32_t startOffset, const char* name, - STableMeta* pMeterMeta, STableDataBlocks** dataBlocks); + STableMeta* pTableMeta, STableDataBlocks** dataBlocks); void tscAppendDataBlock(SDataBlockList* pList, STableDataBlocks* pBlocks); void tscDestroyDataBlock(STableDataBlocks* pDataBlock); @@ -81,11 +81,11 @@ int32_t tscCopyDataBlockToPayload(SSqlObj* pSql, STableDataBlocks* pDataBlock); void tscFreeUnusedDataBlocks(SDataBlockList* pList); int32_t tscMergeTableDataBlocks(SSqlObj* pSql, SDataBlockList* pDataList); int32_t tscGetDataBlockFromList(void* pHashList, SDataBlockList* pDataBlockList, int64_t id, int32_t size, - int32_t startOffset, int32_t rowSize, const char* tableId, STableMeta* pMeterMeta, + int32_t startOffset, int32_t rowSize, const char* tableId, STableMeta* pTableMeta, STableDataBlocks** dataBlocks); SVnodeSidList* tscGetVnodeSidList(SSuperTableMeta* pMetricmeta, int32_t vnodeIdx); -STableSidExtInfo* tscGetMeterSidInfo(SVnodeSidList* pSidList, int32_t idx); +STableIdInfo* tscGetMeterSidInfo(SVnodeSidList* pSidList, int32_t idx); /** * @@ -104,7 +104,7 @@ bool tscIsProjectionQueryOnSTable(SQueryInfo* pQueryInfo, int32_t tableIndex); bool tscProjectionQueryOnTable(SQueryInfo* pQueryInfo); -bool tscIsTwoStageMergeMetricQuery(SQueryInfo* pQueryInfo, int32_t tableIndex); +bool tscIsTwoStageSTableQuery(SQueryInfo* pQueryInfo, int32_t tableIndex); bool tscQueryOnMetric(SSqlCmd* pCmd); bool tscQueryMetricTags(SQueryInfo* pQueryInfo); bool tscIsSelectivityWithTagQuery(SSqlCmd* pCmd); @@ -114,7 +114,7 @@ void tscAddSpecialColumnForSelect(SQueryInfo* pQueryInfo, int32_t outputColIndex void addRequiredTagColumn(SQueryInfo* pQueryInfo, int32_t tagColIndex, int32_t tableIndex); -int32_t setMeterID(SMeterMetaInfo* pMeterMetaInfo, SSQLToken* pzTableName, SSqlObj* pSql); +int32_t setMeterID(STableMetaInfo* pTableMetaInfo, SSQLToken* pzTableName, SSqlObj* pSql); void tscClearInterpInfo(SQueryInfo* pQueryInfo); bool tscIsInsertOrImportData(char* sqlstr); @@ -173,7 +173,7 @@ int32_t tscValidateName(SSQLToken* pToken); void tscIncStreamExecutionCount(void* pStream); -bool tscValidateColumnId(SMeterMetaInfo* pMeterMetaInfo, int32_t colId); +bool tscValidateColumnId(STableMetaInfo* pTableMetaInfo, int32_t colId); // get starter position of metric query condition (query on tags) in SSqlCmd.payload SCond* tsGetMetricQueryCondPos(STagCond* pCond, uint64_t tableIndex); @@ -190,26 +190,26 @@ void tscCleanSqlCmd(SSqlCmd* pCmd); bool tscShouldFreeAsyncSqlObj(SSqlObj* pSql); void tscRemoveAllMeterMetaInfo(SQueryInfo* pQueryInfo, const char* address, bool removeFromCache); -SMeterMetaInfo* tscGetMeterMetaInfo(SSqlCmd *pCmd, int32_t subClauseIndex, int32_t tableIndex); -SMeterMetaInfo* tscGetMeterMetaInfoFromQueryInfo(SQueryInfo *pQueryInfo, int32_t tableIndex); +STableMetaInfo* tscGetTableMetaInfoFromCmd(SSqlCmd *pCmd, int32_t subClauseIndex, int32_t tableIndex); +STableMetaInfo* tscGetMetaInfo(SQueryInfo *pQueryInfo, int32_t tableIndex); SQueryInfo *tscGetQueryInfoDetail(SSqlCmd* pCmd, int32_t subClauseIndex); int32_t tscGetQueryInfoDetailSafely(SSqlCmd *pCmd, int32_t subClauseIndex, SQueryInfo** pQueryInfo); -SMeterMetaInfo* tscGetMeterMetaInfoByUid(SQueryInfo* pQueryInfo, uint64_t uid, int32_t* index); -void tscClearMeterMetaInfo(SMeterMetaInfo* pMeterMetaInfo, bool removeFromCache); +STableMetaInfo* tscGetMeterMetaInfoByUid(SQueryInfo* pQueryInfo, uint64_t uid, int32_t* index); +void tscClearMeterMetaInfo(STableMetaInfo* pTableMetaInfo, bool removeFromCache); -SMeterMetaInfo* tscAddMeterMetaInfo(SQueryInfo* pQueryInfo, const char* name, STableMeta* pMeterMeta, SSuperTableMeta* pMetricMeta, +STableMetaInfo* tscAddMeterMetaInfo(SQueryInfo* pQueryInfo, const char* name, STableMeta* pTableMeta, SSuperTableMeta* pMetricMeta, int16_t numOfTags, int16_t* tags); -SMeterMetaInfo* tscAddEmptyMeterMetaInfo(SQueryInfo *pQueryInfo); +STableMetaInfo* tscAddEmptyMetaInfo(SQueryInfo *pQueryInfo); int32_t tscAddSubqueryInfo(SSqlCmd *pCmd); void tscFreeSubqueryInfo(SSqlCmd* pCmd); void tscClearSubqueryInfo(SSqlCmd* pCmd); void tscGetMetricMetaCacheKey(SQueryInfo* pQueryInfo, char* keyStr, uint64_t uid); int tscGetMetricMeta(SSqlObj* pSql, int32_t clauseIndex); -int tscGetMeterMeta(SSqlObj* pSql, SMeterMetaInfo* pMeterMetaInfo); -int tscGetMeterMetaEx(SSqlObj* pSql, SMeterMetaInfo* pMeterMetaInfo, bool createIfNotExists); +int tscGetTableMeta(SSqlObj* pSql, STableMetaInfo* pTableMetaInfo); +int tscGetMeterMetaEx(SSqlObj* pSql, STableMetaInfo* pTableMetaInfo, bool createIfNotExists); void tscResetForNextRetrieve(SSqlRes* pRes); @@ -252,7 +252,6 @@ void tscTryQueryNextVnode(SSqlObj *pSql, __async_cb_func_t fp); void tscAsyncQuerySingleRowForNextVnode(void *param, TAOS_RES *tres, int numOfRows); void tscTryQueryNextClause(SSqlObj* pSql, void (*queryFp)()); - #ifdef __cplusplus } #endif diff --git a/src/util/inc/tschemautil.h b/src/client/inc/tschemautil.h similarity index 52% rename from src/util/inc/tschemautil.h rename to src/client/inc/tschemautil.h index 64bbf94f4245f7c9886dec9514ff11a101872517..2fc77d69bbe0f276aa5b963da95ef8e6c98dcdae 100644 --- a/src/util/inc/tschemautil.h +++ b/src/client/inc/tschemautil.h @@ -20,13 +20,56 @@ extern "C" { #endif -#include #include "taosmsg.h" #include "tstoken.h" +#include "tsclient.h" #define VALIDNUMOFCOLS(x) ((x) >= TSDB_MIN_COLUMNS && (x) <= TSDB_MAX_COLUMNS) -struct SSchema; +//struct SSchema; + +/** + * get the number of tags of this table + * @param pTableMeta + * @return + */ +int32_t tscGetNumOfTags(const STableMeta* pTableMeta); + +/** + * get the number of columns of this table + * @param pTableMeta + * @return + */ +int32_t tscGetNumOfColumns(const STableMeta* pTableMeta); + +/** + * get the basic info of this table + * @param pTableMeta + * @return + */ +STableComInfo tscGetTableInfo(const STableMeta* pTableMeta); + +/** + * get the schema + * @param pTableMeta + * @return + */ +SSchema* tscGetTableSchema(const STableMeta* pTableMeta); + +/** + * get the tag schema + * @param pMeta + * @return + */ +SSchema *tscGetTableTagSchema(const STableMeta *pMeta); + +/** + * + * @param pMeta + * @param startCol + * @return + */ +SSchema *tscGetTableColumnSchema(const STableMeta *pMeta, int32_t startCol); /** * check if the schema is valid or not, including following aspects: @@ -42,21 +85,23 @@ struct SSchema; */ bool isValidSchema(struct SSchema *pSchema, int32_t numOfCols); -struct SSchema *tsGetSchema(STableMeta *pMeta); - -struct SSchema *tsGetTagSchema(STableMeta *pMeta); +/** + * get the schema for the "tbname" column. it is a built column + * @return + */ +SSchema tscGetTbnameColumnSchema(); -struct SSchema *tsGetColumnSchema(STableMeta *pMeta, int32_t startCol); -struct SSchema tsGetTbnameColumnSchema(); +/** + * create the table meta from the msg + * @param pTableMetaMsg + * @param size size of the table meta + * @return + */ +STableMeta* tscCreateTableMetaFromMsg(STableMetaMsg* pTableMetaMsg, size_t* size); +//todo tags value as well as the table id structure needs refactor char *tsGetTagsValue(STableMeta *pMeta); -bool tsMeterMetaIdentical(STableMeta *p1, STableMeta *p2); - -void extractTableName(char *tableId, char *name); - -SSQLToken extractDBName(char *tableId, char *name); - void extractTableNameFromToken(SSQLToken *pToken, SSQLToken* pTable); #ifdef __cplusplus diff --git a/src/client/inc/tsclient.h b/src/client/inc/tsclient.h index caec0fdbb8c444e865610ac1e08b992decfc31b3..2dd580dda00651d8d7d4d84505147a8f464d954c 100644 --- a/src/client/inc/tsclient.h +++ b/src/client/inc/tsclient.h @@ -21,17 +21,19 @@ extern "C" { #endif #include "os.h" + +#include "qsqlparser.h" +#include "qsqltype.h" +#include "qtsbuf.h" #include "taos.h" +#include "taosdef.h" #include "taosmsg.h" #include "tglobalcfg.h" #include "tlog.h" -#include "tscCache.h" -#include "tscSQLParser.h" -#include "taosdef.h" +#include "trpc.h" #include "tsqlfunction.h" #include "tutil.h" -#include "trpc.h" -#include "qsqltype.h" +#include "tarray.h" #define TSC_GET_RESPTR_BASE(res, _queryinfo, col) (res->data + ((_queryinfo)->fieldsInfo.pSqlExpr[col]->offset) * res->numOfRows) @@ -46,8 +48,28 @@ typedef struct SSqlGroupbyExpr { int16_t orderType; // order by type: asc/desc } SSqlGroupbyExpr; -typedef struct SMeterMetaInfo { - STableMeta * pMeterMeta; // metermeta +typedef struct STableComInfo { + uint8_t numOfTags; + uint8_t precision; + int16_t numOfColumns; + int16_t rowSize; +} STableComInfo; + +typedef struct STableMeta { + //super table if it is created according to super table, otherwise, tableInfo is used + union { struct STableMeta* pSTable; STableComInfo tableInfo; }; + uint8_t tableType; + int8_t numOfVpeers; + int16_t sversion; + SVnodeDesc vpeerDesc[TSDB_VNODES_SUPPORT]; + int32_t vgId; // virtual group id, which current table belongs to + int32_t sid; // the index of one table in a virtual node + uint64_t uid; // unique id of a table + SSchema schema[]; // if the table is TSDB_CHILD_TABLE, schema is acquired by super table meta info +} STableMeta; + +typedef struct STableMetaInfo { + STableMeta * pTableMeta; // table meta, cached in client side and acquried by name SSuperTableMeta *pMetricMeta; // metricmeta /* @@ -55,14 +77,14 @@ typedef struct SMeterMetaInfo { * 2. keep the vnode index for multi-vnode insertion */ int32_t vnodeIndex; - char name[TSDB_TABLE_ID_LEN + 1]; // table(super table) name + char name[TSDB_TABLE_ID_LEN]; // (super) table name int16_t numOfTags; // total required tags in query, including groupby tags int16_t tagColumnIndex[TSDB_MAX_TAGS]; // clause + tag projection -} SMeterMetaInfo; +} STableMetaInfo; /* the structure for sql function in select clause */ typedef struct SSqlExpr { - char aliasName[TSDB_COL_NAME_LEN + 1]; // as aliasName + char aliasName[TSDB_COL_NAME_LEN]; // as aliasName SColIndexEx colInfo; int64_t uid; // refactor use the pointer int16_t functionId; // function id in aAgg array @@ -83,7 +105,6 @@ typedef struct SFieldInfo { int16_t numOfOutputCols; // number of column in result int16_t numOfAlloc; // allocated size TAOS_FIELD *pFields; -// short * pOffset; /* * define if this column is belong to the queried result, it may be add by parser to faciliate @@ -161,7 +182,7 @@ typedef struct STableDataBlocks { char tableId[TSDB_TABLE_ID_LEN]; int8_t tsSource; // where does the UNIX timestamp come from, server or client bool ordered; // if current rows are ordered or not - int64_t vgid; // virtual group id + int64_t vgId; // virtual group id int64_t prevTS; // previous timestamp, recorded to decide if the records array is ts ascending int32_t numOfTables; // number of tables in current submit block @@ -174,7 +195,7 @@ typedef struct STableDataBlocks { * the metermeta for current table, the metermeta will be used during submit stage, keep a ref * to avoid it to be removed from cache */ - STableMeta *pMeterMeta; + STableMeta *pTableMeta; union { char *filename; @@ -199,7 +220,7 @@ typedef struct SDataBlockList { typedef struct SQueryInfo { int16_t command; // the command may be different for each subclause, so keep it seperately. uint16_t type; // query/insert/import type - char intervalTimeUnit; + char slidingTimeUnit; int64_t etime, stime; int64_t intervalTime; // aggregation time interval @@ -215,7 +236,7 @@ typedef struct SQueryInfo { SOrderVal order; int16_t interpoType; // interpolate type int16_t numOfTables; - SMeterMetaInfo **pMeterInfo; + STableMetaInfo **pTableMetaInfo; struct STSBuf * tsBuf; int64_t * defaultVal; // default value for interpolation char * msg; // pointer to the pCmd->payload to keep error message temporarily @@ -238,7 +259,7 @@ typedef struct { union { bool existsCheck; // check if the table exists or not bool inStream; // denote if current sql is executed in stream or not - bool createOnDemand; // if the table is missing, on-the-fly create it. during getmeterMeta + bool autoCreated; // if the table is missing, on-the-fly create it. during getmeterMeta int8_t dataSourceType; // load data from file or not }; @@ -296,7 +317,7 @@ typedef struct { struct SLocalReducer *pLocalReducer; } SSqlRes; -typedef struct _tsc_obj { +typedef struct STscObj { void * signature; void * pTimer; char mgmtIp[TSDB_USER_LEN]; @@ -308,14 +329,14 @@ typedef struct _tsc_obj { char sversion[TSDB_VERSION_LEN]; char writeAuth : 1; char superAuth : 1; - struct _sql_obj *pSql; - struct _sql_obj *pHb; - struct _sql_obj *sqlList; - struct _sstream *streamList; + struct SSqlObj *pSql; + struct SSqlObj *pHb; + struct SSqlObj *sqlList; + struct SSqlStream *streamList; pthread_mutex_t mutex; } STscObj; -typedef struct _sql_obj { +typedef struct SSqlObj { void * signature; STscObj *pTscObj; void (*fp)(); @@ -340,11 +361,11 @@ typedef struct _sql_obj { uint8_t numOfSubs; char * asyncTblPos; void * pTableHashList; - struct _sql_obj **pSubs; - struct _sql_obj * prev, *next; + struct SSqlObj **pSubs; + struct SSqlObj * prev, *next; } SSqlObj; -typedef struct _sstream { +typedef struct SSqlStream { SSqlObj *pSql; uint32_t streamId; char listed; @@ -369,7 +390,7 @@ typedef struct _sstream { void *param; void (*callback)(void *); // Callback function when stream is stopped from client level - struct _sstream *prev, *next; + struct SSqlStream *prev, *next; } SSqlStream; int32_t tscInitRpc(const char *user, const char *secret); @@ -377,14 +398,12 @@ int32_t tscInitRpc(const char *user, const char *secret); // tscSql API int tsParseSql(SSqlObj *pSql, bool multiVnodeInsertion); -void tscInitMsgs(); +void tscInitMsgsFp(); extern int (*tscBuildMsg[TSDB_SQL_MAX])(SSqlObj *pSql, SSqlInfo *pInfo); void tscProcessMsgFromServer(SRpcMsg *rpcMsg); int tscProcessSql(SSqlObj *pSql); -void tscAsyncInsertMultiVnodesProxy(void *param, TAOS_RES *tres, int numOfRows); - int tscRenewMeterMeta(SSqlObj *pSql, char *tableId); void tscQueueAsyncRes(SSqlObj *pSql); @@ -442,10 +461,8 @@ char *tscGetErrorMsgPayload(SSqlCmd *pCmd); int32_t tscInvalidSQLErrMsg(char *msg, const char *additionalInfo, const char *sql); -// transfer SSqlInfo to SqlCmd struct -int32_t tscToSQLCmd(SSqlObj *pSql, struct SSqlInfo *pInfo); - void tscQueueAsyncFreeResult(SSqlObj *pSql); +int32_t tscToSQLCmd(SSqlObj* pSql, struct SSqlInfo* pInfo); extern void * pVnodeConn; extern void * pTscMgmtConn; @@ -453,7 +470,6 @@ extern void * tscCacheHandle; extern int32_t globalCode; extern int slaveIndex; extern void * tscTmr; -extern void * tscConnCache; extern void * tscQhandle; extern int tscKeepConn[]; extern int tsInsertHeadSize; diff --git a/src/client/src/TSDBJNIConnector.c b/src/client/src/TSDBJNIConnector.c index 228403c79d318d922f5571a9663b3c97bbffbbc8..dfaedc7c143890b7541d0eae59ece25af13730c4 100644 --- a/src/client/src/TSDBJNIConnector.c +++ b/src/client/src/TSDBJNIConnector.c @@ -13,13 +13,13 @@ * along with this program. If not, see . */ -#include "os.h" #include "com_taosdata_jdbc_TSDBJNIConnector.h" +#include "os.h" #include "taos.h" #include "tlog.h" -#include "tscJoinProcess.h" -#include "tsclient.h" +#include "tscSubquery.h" #include "tscUtil.h" +#include "tsclient.h" #include "ttime.h" int __init = 0; diff --git a/src/client/src/tcache.c b/src/client/src/tcache.c deleted file mode 100644 index e6213c45a41f104e6d948c2b46e281e9909ea2b8..0000000000000000000000000000000000000000 --- a/src/client/src/tcache.c +++ /dev/null @@ -1,947 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "os.h" - -#include "tcache.h" -#include "tlog.h" -#include "ttime.h" -#include "ttimer.h" -#include "tutil.h" -#include "hashutil.h" - -#define HASH_MAX_CAPACITY (1024*1024*16) -#define HASH_VALUE_IN_TRASH (-1) -#define HASH_DEFAULT_LOAD_FACTOR (0.75) -#define HASH_INDEX(v, c) ((v) & ((c)-1)) - -/** - * todo: refactor to extract the hash table out of cache structure - */ -typedef struct SCacheStatis { - int64_t missCount; - int64_t hitCount; - int64_t totalAccess; - int64_t refreshCount; - int32_t numOfCollision; - int32_t numOfResize; - int64_t resizeTime; -} SCacheStatis; - -typedef struct _cache_node_t { - char * key; // null-terminated string - struct _cache_node_t *prev; - struct _cache_node_t *next; - uint64_t addTime; // the time when this element is added or updated into cache - uint64_t time; // end time when this element should be remove from cache - uint64_t signature; - - /* - * reference count for this object - * if this value is larger than 0, this value will never be released - */ - uint32_t refCount; - uint32_t hashVal; // the hash value of key, if hashVal == HASH_VALUE_IN_TRASH, this node is moved to trash - uint32_t nodeSize; // allocated size for current SDataNode - char data[]; -} SDataNode; - -typedef uint32_t (*_hashFunc)(const char *, uint32_t); - -typedef struct { - SDataNode **hashList; - int capacity; - int size; - int64_t totalSize; // total allocated buffer in this hash table, SCacheObj is not included. - int64_t refreshTime; - - /* - * to accommodate the old datanode which has the same key value of new one in hashList - * when an new node is put into cache, if an existed one with the same key: - * 1. if the old one does not be referenced, update it. - * 2. otherwise, move the old one to pTrash, add the new one. - * - * when the node in pTrash does not be referenced, it will be release at the expired time - */ - SDataNode * pTrash; - void * tmrCtrl; - void * pTimer; - SCacheStatis statistics; - _hashFunc hashFp; - int numOfElemsInTrash; // number of element in trash - int16_t deleting; // set the deleting flag to stop refreshing asap. - -#if defined LINUX - pthread_rwlock_t lock; -#else - pthread_mutex_t lock; -#endif - -} SCacheObj; - -static FORCE_INLINE void __cache_wr_lock(SCacheObj *pObj) { -#if defined LINUX - pthread_rwlock_wrlock(&pObj->lock); -#else - pthread_mutex_lock(&pObj->lock); -#endif -} - -static FORCE_INLINE void __cache_rd_lock(SCacheObj *pObj) { -#if defined LINUX - pthread_rwlock_rdlock(&pObj->lock); -#else - pthread_mutex_lock(&pObj->lock); -#endif -} - -static FORCE_INLINE void __cache_unlock(SCacheObj *pObj) { -#if defined LINUX - pthread_rwlock_unlock(&pObj->lock); -#else - pthread_mutex_unlock(&pObj->lock); -#endif -} - -static FORCE_INLINE int32_t __cache_lock_init(SCacheObj *pObj) { -#if defined LINUX - return pthread_rwlock_init(&pObj->lock, NULL); -#else - return pthread_mutex_init(&pObj->lock, NULL); -#endif -} - -static FORCE_INLINE void __cache_lock_destroy(SCacheObj *pObj) { -#if defined LINUX - pthread_rwlock_destroy(&pObj->lock); -#else - pthread_mutex_destroy(&pObj->lock); -#endif -} - -static FORCE_INLINE int32_t taosHashTableLength(int32_t length) { - int32_t trueLength = MIN(length, HASH_MAX_CAPACITY); - - int32_t i = 4; - while (i < trueLength) i = (i << 1); - return i; -} - -/** - * @param key key of object for hash, usually a null-terminated string - * @param keyLen length of key - * @param pData actually data. required a consecutive memory block, no pointer is allowed - * in pData. Pointer copy causes memory access error. - * @param size size of block - * @param lifespan total survial time from now - * @return SDataNode - */ -static SDataNode *taosCreateHashNode(const char *key, uint32_t keyLen, const char *pData, size_t dataSize, - uint64_t lifespan) { - size_t totalSize = dataSize + sizeof(SDataNode) + keyLen; - - SDataNode *pNewNode = calloc(1, totalSize); - if (pNewNode == NULL) { - pError("failed to allocate memory, reason:%s", strerror(errno)); - return NULL; - } - - memcpy(pNewNode->data, pData, dataSize); - pNewNode->addTime = (uint64_t)taosGetTimestampMs(); - pNewNode->time = pNewNode->addTime + lifespan; - - pNewNode->key = pNewNode->data + dataSize; - strcpy(pNewNode->key, key); - - pNewNode->signature = (uint64_t)pNewNode; - pNewNode->nodeSize = (uint32_t)totalSize; - - return pNewNode; -} - -/** - * hash key function - * - * @param key key string - * @param len length of key - * @return hash value - */ -static FORCE_INLINE uint32_t taosHashKey(const char *key, uint32_t len) { return MurmurHash3_32(key, len); } - -/** - * add object node into trash, and this object is closed for referencing if it is add to trash - * It will be removed until the pNode->refCount == 0 - * @param pObj Cache object - * @param pNode Cache slot object - */ -static void taosAddToTrash(SCacheObj *pObj, SDataNode *pNode) { - if (pNode->hashVal == HASH_VALUE_IN_TRASH) { /* node is already in trash */ - return; - } - - pNode->next = pObj->pTrash; - if (pObj->pTrash) { - pObj->pTrash->prev = pNode; - } - - pNode->prev = NULL; - pObj->pTrash = pNode; - - pNode->hashVal = HASH_VALUE_IN_TRASH; - pObj->numOfElemsInTrash++; - - pTrace("key:%s %p move to trash, numOfElem in trash:%d", pNode->key, pNode, pObj->numOfElemsInTrash); -} - -static void taosRemoveFromTrash(SCacheObj *pObj, SDataNode *pNode) { - if (pNode->signature != (uint64_t)pNode) { - pError("key:sig:%d %p data has been released, ignore", pNode->signature, pNode); - return; - } - - pObj->numOfElemsInTrash--; - if (pNode->prev) { - pNode->prev->next = pNode->next; - } else { - /* pnode is the header, update header */ - pObj->pTrash = pNode->next; - } - - if (pNode->next) { - pNode->next->prev = pNode->prev; - } - - pNode->signature = 0; - free(pNode); -} -/** - * remove nodes in trash with refCount == 0 in cache - * @param pNode - * @param pObj - * @param force force model, if true, remove data in trash without check refcount. - * may cause corruption. So, forece model only applys before cache is closed - */ -static void taosClearCacheTrash(SCacheObj *pObj, bool force) { - __cache_wr_lock(pObj); - - if (pObj->numOfElemsInTrash == 0) { - if (pObj->pTrash != NULL) { - pError("key:inconsistency data in cache, numOfElem in trash:%d", pObj->numOfElemsInTrash); - } - pObj->pTrash = NULL; - - __cache_unlock(pObj); - return; - } - - SDataNode *pNode = pObj->pTrash; - - while (pNode) { - if (pNode->refCount < 0) { - pError("key:%s %p in trash released more than referenced, removed", pNode->key, pNode); - pNode->refCount = 0; - } - - if (pNode->next == pNode) { - pNode->next = NULL; - } - - if (force || (pNode->refCount == 0)) { - pTrace("key:%s %p removed from trash. numOfElem in trash:%d", pNode->key, pNode, pObj->numOfElemsInTrash - 1) - SDataNode *pTmp = pNode; - pNode = pNode->next; - taosRemoveFromTrash(pObj, pTmp); - } else { - pNode = pNode->next; - } - } - - assert(pObj->numOfElemsInTrash >= 0); - __cache_unlock(pObj); -} - -/** - * add data node into cache - * @param pObj cache object - * @param pNode Cache slot object - */ -static void taosAddNodeToHashTable(SCacheObj *pObj, SDataNode *pNode) { - int32_t slotIndex = HASH_INDEX(pNode->hashVal, pObj->capacity); - pNode->next = pObj->hashList[slotIndex]; - - if (pObj->hashList[slotIndex] != NULL) { - (pObj->hashList[slotIndex])->prev = pNode; - pObj->statistics.numOfCollision++; - } - pObj->hashList[slotIndex] = pNode; - - pObj->size++; - pObj->totalSize += pNode->nodeSize; - - pTrace("key:%s %p add to hash table", pNode->key, pNode); -} - -/** - * remove node in hash list - * @param pObj - * @param pNode - */ -static void taosRemoveNodeInHashTable(SCacheObj *pObj, SDataNode *pNode) { - if (pNode->hashVal == HASH_VALUE_IN_TRASH) return; - - SDataNode *pNext = pNode->next; - if (pNode->prev != NULL) { - pNode->prev->next = pNext; - } else { /* the node is in hashlist, remove it */ - pObj->hashList[HASH_INDEX(pNode->hashVal, pObj->capacity)] = pNext; - } - - if (pNext != NULL) { - pNext->prev = pNode->prev; - } - - pObj->size--; - pObj->totalSize -= pNode->nodeSize; - - pNode->next = NULL; - pNode->prev = NULL; - - pTrace("key:%s %p remove from hashtable", pNode->key, pNode); -} - -/** - * in-place node in hashlist - * @param pObj cache object - * @param pNode data node - */ -static void taosUpdateInHashTable(SCacheObj *pObj, SDataNode *pNode) { - assert(pNode->hashVal >= 0); - - if (pNode->prev) { - pNode->prev->next = pNode; - } else { - pObj->hashList[HASH_INDEX(pNode->hashVal, pObj->capacity)] = pNode; - } - - if (pNode->next) { - (pNode->next)->prev = pNode; - } - - pTrace("key:%s %p update hashtable", pNode->key, pNode); -} - -/** - * get SDataNode from hashlist, nodes from trash are not included. - * @param pObj Cache objection - * @param key key for hash - * @param keyLen key length - * @return - */ -static SDataNode *taosGetNodeFromHashTable(SCacheObj *pObj, const char *key, uint32_t keyLen) { - uint32_t hash = (*pObj->hashFp)(key, keyLen); - - int32_t slot = HASH_INDEX(hash, pObj->capacity); - SDataNode *pNode = pObj->hashList[slot]; - - while (pNode) { - if (strcmp(pNode->key, key) == 0) break; - - pNode = pNode->next; - } - - if (pNode) { - assert(HASH_INDEX(pNode->hashVal, pObj->capacity) == slot); - } - - return pNode; -} - -/** - * resize the hash list if the threshold is reached - * - * @param pObj - */ -static void taosHashTableResize(SCacheObj *pObj) { - if (pObj->size < pObj->capacity * HASH_DEFAULT_LOAD_FACTOR) { - return; - } - - // double the original capacity - pObj->statistics.numOfResize++; - SDataNode *pNode = NULL; - SDataNode *pNext = NULL; - - int32_t newSize = pObj->capacity << 1; - if (newSize > HASH_MAX_CAPACITY) { - pTrace("current capacity:%d, maximum capacity:%d, no resize applied due to limitation is reached", - pObj->capacity, HASH_MAX_CAPACITY); - return; - } - - int64_t st = taosGetTimestampUs(); - SDataNode **pList = realloc(pObj->hashList, sizeof(SDataNode *) * newSize); - if (pList == NULL) { - pTrace("cache resize failed due to out of memory, capacity remain:%d", pObj->capacity); - return; - } - - pObj->hashList = pList; - - int32_t inc = newSize - pObj->capacity; - memset(&pObj->hashList[pObj->capacity], 0, inc * sizeof(SDataNode *)); - - pObj->capacity = newSize; - - for (int32_t i = 0; i < pObj->capacity; ++i) { - pNode = pObj->hashList[i]; - - while (pNode) { - int32_t j = HASH_INDEX(pNode->hashVal, pObj->capacity); - if (j == i) { // this key resides in the same slot, no need to relocate it - pNode = pNode->next; - } else { - pNext = pNode->next; - - // remove from current slot - if (pNode->prev != NULL) { - pNode->prev->next = pNode->next; - } else { - pObj->hashList[i] = pNode->next; - } - - if (pNode->next != NULL) { - (pNode->next)->prev = pNode->prev; - } - - // added into new slot - pNode->next = NULL; - pNode->prev = NULL; - - pNode->next = pObj->hashList[j]; - - if (pObj->hashList[j] != NULL) { - (pObj->hashList[j])->prev = pNode; - } - pObj->hashList[j] = pNode; - - // continue - pNode = pNext; - } - } - } - - int64_t et = taosGetTimestampUs(); - pObj->statistics.resizeTime += (et - st); - - pTrace("cache resize completed, new capacity:%d, load factor:%f, elapsed time:%fms", pObj->capacity, - ((double)pObj->size) / pObj->capacity, (et - st) / 1000.0); -} - -/** - * release node - * @param pObj cache object - * @param pNode data node - */ -static FORCE_INLINE void taosCacheReleaseNode(SCacheObj *pObj, SDataNode *pNode) { - taosRemoveNodeInHashTable(pObj, pNode); - if (pNode->signature != (uint64_t)pNode) { - pError("key:%s, %p data is invalid, or has been released", pNode->key, pNode); - return; - } - - pTrace("key:%s is removed from cache,total:%d,size:%ldbytes", pNode->key, pObj->size, pObj->totalSize); - pNode->signature = 0; - free(pNode); -} - -/** - * move the old node into trash - * @param pObj - * @param pNode - */ -static FORCE_INLINE void taosCacheMoveNodeToTrash(SCacheObj *pObj, SDataNode *pNode) { - taosRemoveNodeInHashTable(pObj, pNode); - taosAddToTrash(pObj, pNode); -} - -/** - * update data in cache - * @param pObj - * @param pNode - * @param key - * @param keyLen - * @param pData - * @param dataSize - * @return - */ -static SDataNode *taosUpdateCacheImpl(SCacheObj *pObj, SDataNode *pNode, char *key, int32_t keyLen, void *pData, - uint32_t dataSize, uint64_t keepTime) { - SDataNode *pNewNode = NULL; - - // only a node is not referenced by any other object, in-place update it - if (pNode->refCount == 0) { - size_t newSize = sizeof(SDataNode) + dataSize + keyLen; - - pNewNode = (SDataNode *)realloc(pNode, newSize); - if (pNewNode == NULL) { - return NULL; - } - - pNewNode->signature = (uint64_t)pNewNode; - memcpy(pNewNode->data, pData, dataSize); - - pNewNode->key = pNewNode->data + dataSize; - strcpy(pNewNode->key, key); - - // update the timestamp information for updated key/value - pNewNode->addTime = taosGetTimestampMs(); - pNewNode->time = pNewNode->addTime + keepTime; - - atomic_add_fetch_32(&pNewNode->refCount, 1); - - // the address of this node may be changed, so the prev and next element should update the corresponding pointer - taosUpdateInHashTable(pObj, pNewNode); - } else { - int32_t hashVal = pNode->hashVal; - taosCacheMoveNodeToTrash(pObj, pNode); - - pNewNode = taosCreateHashNode(key, keyLen, pData, dataSize, keepTime); - if (pNewNode == NULL) { - return NULL; - } - - atomic_add_fetch_32(&pNewNode->refCount, 1); - - assert(hashVal == (*pObj->hashFp)(key, keyLen - 1)); - pNewNode->hashVal = hashVal; - - // add new element to hashtable - taosAddNodeToHashTable(pObj, pNewNode); - } - - return pNewNode; -} - -/** - * add data into hash table - * @param key - * @param pData - * @param size - * @param pObj - * @param keyLen - * @param pNode - * @return - */ -static FORCE_INLINE SDataNode *taosAddToCacheImpl(SCacheObj *pObj, char *key, uint32_t keyLen, const char *pData, - int dataSize, uint64_t lifespan) { - SDataNode *pNode = taosCreateHashNode(key, keyLen, pData, dataSize, lifespan); - if (pNode == NULL) { - return NULL; - } - - atomic_add_fetch_32(&pNode->refCount, 1); - pNode->hashVal = (*pObj->hashFp)(key, keyLen - 1); - taosAddNodeToHashTable(pObj, pNode); - - return pNode; -} - -/** - * add data into cache - * - * @param handle cache object - * @param key key - * @param pData cached data - * @param dataSize data size - * @param keepTime survival time in second - * @return cached element - */ -void *taosAddDataIntoCache(void *handle, char *key, char *pData, int dataSize, int keepTime) { - SDataNode *pNode; - SCacheObj *pObj; - - pObj = (SCacheObj *)handle; - if (pObj == NULL || pObj->capacity == 0) return NULL; - - uint32_t keyLen = (uint32_t)strlen(key) + 1; - - __cache_wr_lock(pObj); - - SDataNode *pOldNode = taosGetNodeFromHashTable(pObj, key, keyLen - 1); - - if (pOldNode == NULL) { // do add to cache - // check if the threshold is reached - taosHashTableResize(pObj); - - pNode = taosAddToCacheImpl(pObj, key, keyLen, pData, dataSize, keepTime * 1000L); - if (NULL != pNode) { - pTrace( - "key:%s %p added into cache, slot:%d, addTime:%" PRIu64 ", expireTime:%" PRIu64 ", cache total:%d, " - "size:%" PRId64 " bytes, collision:%d", - pNode->key, pNode, HASH_INDEX(pNode->hashVal, pObj->capacity), pNode->addTime, pNode->time, pObj->size, - pObj->totalSize, pObj->statistics.numOfCollision); - } - } else { // old data exists, update the node - pNode = taosUpdateCacheImpl(pObj, pOldNode, key, keyLen, pData, dataSize, keepTime * 1000L); - pTrace("key:%s %p exist in cache, updated", key, pNode); - } - - __cache_unlock(pObj); - - return (pNode != NULL) ? pNode->data : NULL; -} - -static FORCE_INLINE void taosDecRef(SDataNode *pNode) { - if (pNode == NULL) { - return; - } - - if (pNode->refCount > 0) { - atomic_sub_fetch_32(&pNode->refCount, 1); - pTrace("key:%s is released by app.refcnt:%d", pNode->key, pNode->refCount); - } else { - /* - * safety check. - * app may false releases cached object twice, to decrease the refcount more than acquired - */ - pError("key:%s is released by app more than referenced.refcnt:%d", pNode->key, pNode->refCount); - } -} - -/** - * remove data in cache, the data will not be removed immediately. - * if it is referenced by other object, it will be remain in cache - * @param handle - * @param data - */ -void taosRemoveDataFromCache(void *handle, void **data, bool _remove) { - SCacheObj *pObj = (SCacheObj *)handle; - if (pObj == NULL || pObj->capacity == 0 || (*data) == NULL || (pObj->size + pObj->numOfElemsInTrash == 0)) return; - - size_t offset = offsetof(SDataNode, data); - SDataNode *pNode = (SDataNode *)((char *)(*data) - offset); - - if (pNode->signature != (uint64_t)pNode) { - pError("key: %p release invalid cache data", pNode); - return; - } - - *data = NULL; - - if (_remove) { - __cache_wr_lock(pObj); - // pNode may be released immediately by other thread after the reference count of pNode is set to 0, - // So we need to lock it in the first place. - taosDecRef(pNode); - taosCacheMoveNodeToTrash(pObj, pNode); - - __cache_unlock(pObj); - } else { - taosDecRef(pNode); - } -} - -/** - * get data from cache - * @param handle cache object - * @param key key - * @return cached data or NULL - */ -void *taosGetDataFromCache(void *handle, char *key) { - SCacheObj *pObj = (SCacheObj *)handle; - if (pObj == NULL || pObj->capacity == 0) return NULL; - - uint32_t keyLen = (uint32_t)strlen(key); - - __cache_rd_lock(pObj); - - SDataNode *ptNode = taosGetNodeFromHashTable(handle, key, keyLen); - if (ptNode != NULL) { - atomic_add_fetch_32(&ptNode->refCount, 1); - } - - __cache_unlock(pObj); - - if (ptNode != NULL) { - atomic_add_fetch_32(&pObj->statistics.hitCount, 1); - pTrace("key:%s is retrieved from cache,refcnt:%d", key, ptNode->refCount); - } else { - atomic_add_fetch_32(&pObj->statistics.missCount, 1); - pTrace("key:%s not in cache,retrieved failed", key); - } - - atomic_add_fetch_32(&pObj->statistics.totalAccess, 1); - return (ptNode != NULL) ? ptNode->data : NULL; -} - -/** - * update data in cache - * @param handle hash object handle(pointer) - * @param key key for hash - * @param pData actually data - * @param size length of data - * @return new referenced data - */ -void *taosUpdateDataFromCache(void *handle, char *key, char *pData, int size, int duration) { - SCacheObj *pObj = (SCacheObj *)handle; - if (pObj == NULL || pObj->capacity == 0) return NULL; - - SDataNode *pNew = NULL; - - uint32_t keyLen = strlen(key) + 1; - - __cache_wr_lock(pObj); - - SDataNode *pNode = taosGetNodeFromHashTable(handle, key, keyLen - 1); - - if (pNode == NULL) { // object has been released, do add operation - pNew = taosAddToCacheImpl(pObj, key, keyLen, pData, size, duration * 1000L); - pWarn("key:%s does not exist, update failed,do add to cache.total:%d,size:%ldbytes", key, pObj->size, - pObj->totalSize); - } else { - pNew = taosUpdateCacheImpl(pObj, pNode, key, keyLen, pData, size, duration * 1000L); - pTrace("key:%s updated.expireTime:%" PRIu64 ".refCnt:%d", key, pNode->time, pNode->refCount); - } - - __cache_unlock(pObj); - return (pNew != NULL) ? pNew->data : NULL; -} - -static void doCleanUpDataCache(SCacheObj* pObj) { - SDataNode *pNode, *pNext; - - __cache_wr_lock(pObj); - - if (pObj->hashList && pObj->size > 0) { - for (int i = 0; i < pObj->capacity; ++i) { - pNode = pObj->hashList[i]; - while (pNode) { - pNext = pNode->next; - free(pNode); - pNode = pNext; - } - } - - tfree(pObj->hashList); - } - - __cache_unlock(pObj); - - taosClearCacheTrash(pObj, true); - __cache_lock_destroy(pObj); - - memset(pObj, 0, sizeof(SCacheObj)); - - free(pObj); -} - -/** - * refresh cache to remove data in both hash list and trash, if any nodes' refcount == 0, every pObj->refreshTime - * @param handle Cache object handle - */ -void taosRefreshDataCache(void *handle, void *tmrId) { - SDataNode *pNode, *pNext; - SCacheObj *pObj = (SCacheObj *)handle; - - if (pObj == NULL || pObj->capacity <= 0) { - pTrace("object is destroyed. no refresh retry"); - return; - } - - if (pObj->deleting == 1) { - doCleanUpDataCache(pObj); - return; - } - - uint64_t time = taosGetTimestampMs(); - uint32_t numOfCheck = 0; - pObj->statistics.refreshCount++; - - int32_t num = pObj->size; - - for (int i = 0; i < pObj->capacity; ++i) { - // in deleting process, quit refreshing immediately - if (pObj->deleting == 1) { - break; - } - - __cache_wr_lock(pObj); - pNode = pObj->hashList[i]; - - while (pNode) { - numOfCheck++; - pNext = pNode->next; - - if (pNode->time <= time && pNode->refCount <= 0) { - taosCacheReleaseNode(pObj, pNode); - } - pNode = pNext; - } - - /* all data have been checked, not need to iterate further */ - if (numOfCheck == num || pObj->size <= 0) { - __cache_unlock(pObj); - break; - } - - __cache_unlock(pObj); - } - - if (pObj->deleting == 1) { // clean up resources and abort - doCleanUpDataCache(pObj); - } else { - taosClearCacheTrash(pObj, false); - taosTmrReset(taosRefreshDataCache, pObj->refreshTime, pObj, pObj->tmrCtrl, &pObj->pTimer); - } -} - -/** - * - * @param handle - * @param tmrId - */ -void taosClearDataCache(void *handle) { - SDataNode *pNode, *pNext; - SCacheObj *pObj = (SCacheObj *)handle; - - int32_t capacity = pObj->capacity; - - for (int i = 0; i < capacity; ++i) { - __cache_wr_lock(pObj); - - pNode = pObj->hashList[i]; - - while (pNode) { - pNext = pNode->next; - taosCacheMoveNodeToTrash(pObj, pNode); - pNode = pNext; - } - - pObj->hashList[i] = NULL; - - __cache_unlock(pObj); - } - - taosClearCacheTrash(pObj, false); -} - -/** - * @param capacity maximum slots available for hash elements - * @param tmrCtrl timer ctrl - * @param refreshTime refresh operation interval time, the maximum survival time when one element is expired and - * not referenced by other objects - * @return - */ -void *taosInitDataCache(int capacity, void *tmrCtrl, int64_t refreshTime) { - if (tmrCtrl == NULL || refreshTime <= 0 || capacity <= 0) { - return NULL; - } - - SCacheObj *pObj = (SCacheObj *)calloc(1, sizeof(SCacheObj)); - if (pObj == NULL) { - pError("failed to allocate memory, reason:%s", strerror(errno)); - return NULL; - } - - // the max slots is not defined by user - pObj->capacity = taosHashTableLength(capacity); - assert((pObj->capacity & (pObj->capacity - 1)) == 0); - - pObj->hashFp = taosHashKey; - pObj->refreshTime = refreshTime * 1000; - - pObj->hashList = (SDataNode **)calloc(1, sizeof(SDataNode *) * pObj->capacity); - if (pObj->hashList == NULL) { - free(pObj); - pError("failed to allocate memory, reason:%s", strerror(errno)); - return NULL; - } - - pObj->tmrCtrl = tmrCtrl; - taosTmrReset(taosRefreshDataCache, pObj->refreshTime, pObj, pObj->tmrCtrl, &pObj->pTimer); - - if (__cache_lock_init(pObj) != 0) { - taosTmrStopA(&pObj->pTimer); - free(pObj->hashList); - free(pObj); - - pError("failed to init lock, reason:%s", strerror(errno)); - return NULL; - } - - return (void *)pObj; -} - -/** - * release all allocated memory and destroy the cache object. - * - * This function only set the deleting flag, and the specific work of clean up cache is delegated to - * taosRefreshDataCache function, which will executed every SCacheObj->refreshTime sec. - * - * If the value of SCacheObj->refreshTime is too large, the taosRefreshDataCache function may not be invoked - * before the main thread terminated, in which case all allocated resources are simply recycled by OS. - * - * @param handle - */ -void taosCleanUpDataCache(void *handle) { - SCacheObj *pObj = (SCacheObj *)handle; - if (pObj == NULL) { - return; - } - - pObj->deleting = 1; -} - -void* taosGetDataFromExists(void* handle, void* data) { - SCacheObj *pObj = (SCacheObj *)handle; - if (pObj == NULL || data == NULL) return NULL; - - size_t offset = offsetof(SDataNode, data); - SDataNode *ptNode = (SDataNode *)((char *)data - offset); - - if (ptNode->signature != (uint64_t) ptNode) { - pError("key: %p the data from cache is invalid", ptNode); - return NULL; - } - - int32_t ref = atomic_add_fetch_32(&ptNode->refCount, 1); - pTrace("%p add ref data in cache, refCnt:%d", data, ref) - - // the data if referenced by at least one object, so the reference count must be greater than the value of 2. - assert(ref >= 2); - return data; -} - -void* taosTransferDataInCache(void* handle, void** data) { - SCacheObj *pObj = (SCacheObj *)handle; - if (pObj == NULL || data == NULL) return NULL; - - size_t offset = offsetof(SDataNode, data); - SDataNode *ptNode = (SDataNode *)((char *)(*data) - offset); - - if (ptNode->signature != (uint64_t) ptNode) { - pError("key: %p the data from cache is invalid", ptNode); - return NULL; - } - - assert(ptNode->refCount >= 1); - - char* d = *data; - - // clear its reference to old area - *data = NULL; - - return d; -} diff --git a/src/client/src/tscAsync.c b/src/client/src/tscAsync.c index f1630ef294cd33fb80d096892120fe9f162203bc..2bab6e03fe0ab4fd746d4c246299252b4f85fe44 100644 --- a/src/client/src/tscAsync.c +++ b/src/client/src/tscAsync.c @@ -22,10 +22,10 @@ #include "tscUtil.h" #include "tsclient.h" #include "tsocket.h" -#include "tscSQLParser.h" #include "tutil.h" #include "tnote.h" #include "tsched.h" +#include "tschemautil.h" static void tscProcessFetchRow(SSchedMsg *pMsg); static void tscAsyncQueryRowsForNextVnode(void *param, TAOS_RES *tres, int numOfRows); @@ -84,7 +84,7 @@ void doAsyncQuery(STscObj* pObj, SSqlObj* pSql, void (*fp)(), void* param, const } // TODO return the correct error code to client in tscQueueAsyncError -void taos_query_a(TAOS *taos, const char *sqlstr, void (*fp)(void *, TAOS_RES *, int), void *param) { +void taos_query_a(TAOS *taos, const char *sqlstr, __async_cb_func_t fp, void *param) { STscObj *pObj = (STscObj *)taos; if (pObj == NULL || pObj->signature != pObj) { tscError("bug!!! pObj:%p", pObj); @@ -397,51 +397,9 @@ void tscQueueAsyncFreeResult(SSqlObj *pSql) { taosScheduleTask(tscQhandle, &schedMsg); } -void tscAsyncInsertMultiVnodesProxy(void *param, TAOS_RES *tres, int numOfRows) { - SSqlObj *pSql = (SSqlObj *)param; - SSqlCmd *pCmd = &pSql->cmd; - int32_t code = TSDB_CODE_SUCCESS; - - assert(pCmd->dataSourceType != 0 && pSql->signature == pSql); - - int32_t index = 0; - SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, index); - - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - assert(pQueryInfo->numOfTables == 1 || pQueryInfo->numOfTables == 2); - - SDataBlockList *pDataBlocks = pCmd->pDataBlocks; - if (pDataBlocks == NULL || pMeterMetaInfo->vnodeIndex >= pDataBlocks->nSize) { - // restore user defined fp - pSql->fp = pSql->fetchFp; - tscTrace("%p Async insertion completed, destroy data block list", pSql); - - // release data block data - pCmd->pDataBlocks = tscDestroyBlockArrayList(pCmd->pDataBlocks); - - // all data has been sent to vnode, call user function - (*pSql->fp)(pSql->param, tres, numOfRows); - } else { - do { - code = tscCopyDataBlockToPayload(pSql, pDataBlocks->pData[pMeterMetaInfo->vnodeIndex++]); - if (code != TSDB_CODE_SUCCESS) { - tscTrace("%p prepare submit data block failed in async insertion, vnodeIdx:%d, total:%d, code:%d", - pSql, pMeterMetaInfo->vnodeIndex - 1, pDataBlocks->nSize, code); - } - - } while (code != TSDB_CODE_SUCCESS && pMeterMetaInfo->vnodeIndex < pDataBlocks->nSize); - - // build submit msg may fail - if (code == TSDB_CODE_SUCCESS) { - tscTrace("%p async insertion, vnodeIdx:%d, total:%d", pSql, pMeterMetaInfo->vnodeIndex - 1, pDataBlocks->nSize); - tscProcessSql(pSql); - } - } -} - int tscSendMsgToServer(SSqlObj *pSql); -void tscMeterMetaCallBack(void *param, TAOS_RES *res, int code) { +void tscTableMetaCallBack(void *param, TAOS_RES *res, int code) { SSqlObj *pSql = (SSqlObj *)param; if (pSql == NULL || pSql->signature != pSql) return; @@ -465,10 +423,10 @@ void tscMeterMetaCallBack(void *param, TAOS_RES *res, int code) { tscTrace("%p renew tableMeta successfully, command:%d, code:%d, retry:%d", pSql, pSql->cmd.command, pSql->res.code, pSql->retry); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfo(&pSql->cmd, 0, 0); - assert(pMeterMetaInfo->pMeterMeta == NULL); + STableMetaInfo* pTableMetaInfo = tscGetTableMetaInfoFromCmd(&pSql->cmd, 0, 0); + assert(pTableMetaInfo->pTableMeta == NULL); - tscGetMeterMeta(pSql, pMeterMetaInfo); + tscGetTableMeta(pSql, pTableMetaInfo); code = tscSendMsgToServer(pSql); if (code != 0) { pRes->code = code; @@ -490,18 +448,18 @@ void tscMeterMetaCallBack(void *param, TAOS_RES *res, int code) { SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); if ((pQueryInfo->type & TSDB_QUERY_TYPE_STABLE_SUBQUERY) == TSDB_QUERY_TYPE_STABLE_SUBQUERY) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - assert(pMeterMetaInfo->pMeterMeta->numOfTags != 0 && pMeterMetaInfo->vnodeIndex >= 0 && pSql->param != NULL); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + assert((tscGetNumOfTags(pTableMetaInfo->pTableMeta) != 0) && pTableMetaInfo->vnodeIndex >= 0 && pSql->param != NULL); SRetrieveSupport *trs = (SRetrieveSupport *)pSql->param; SSqlObj * pParObj = trs->pParentSqlObj; - assert(pParObj->signature == pParObj && trs->subqueryIndex == pMeterMetaInfo->vnodeIndex && - pMeterMetaInfo->pMeterMeta->numOfTags != 0); + assert(pParObj->signature == pParObj && trs->subqueryIndex == pTableMetaInfo->vnodeIndex && + tscGetNumOfTags(pTableMetaInfo->pTableMeta) != 0); tscTrace("%p get metricMeta during super table query successfully", pSql); - code = tscGetMeterMeta(pSql, pMeterMetaInfo); + code = tscGetTableMeta(pSql, pTableMetaInfo); pRes->code = code; if (code == TSDB_CODE_ACTION_IN_PROGRESS) return; @@ -514,11 +472,11 @@ void tscMeterMetaCallBack(void *param, TAOS_RES *res, int code) { if (pCmd->isParseFinish) { tscTrace("%p resend data to vnode in metermeta callback since sql has been parsed completed", pSql); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - code = tscGetMeterMeta(pSql, pMeterMetaInfo); + STableMetaInfo* pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + code = tscGetTableMeta(pSql, pTableMetaInfo); assert(code == TSDB_CODE_SUCCESS); - if (pMeterMetaInfo->pMeterMeta) { + if (pTableMetaInfo->pTableMeta) { code = tscSendMsgToServer(pSql); if (code == TSDB_CODE_SUCCESS) return; } @@ -529,13 +487,13 @@ void tscMeterMetaCallBack(void *param, TAOS_RES *res, int code) { } } else { // stream computing - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - code = tscGetMeterMeta(pSql, pMeterMetaInfo); + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + code = tscGetTableMeta(pSql, pTableMetaInfo); pRes->code = code; if (code == TSDB_CODE_ACTION_IN_PROGRESS) return; - if (code == TSDB_CODE_SUCCESS && UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { + if (code == TSDB_CODE_SUCCESS && UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { code = tscGetMetricMeta(pSql, pCmd->clauseIndex); pRes->code = code; diff --git a/src/client/src/tscCache.c b/src/client/src/tscCache.c deleted file mode 100644 index 666d069a58c936e9028b46f9e6244923ac4be993..0000000000000000000000000000000000000000 --- a/src/client/src/tscCache.c +++ /dev/null @@ -1,264 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "os.h" - -#include "tglobalcfg.h" -#include "tlog.h" -#include "tmempool.h" -#include "tsclient.h" -#include "ttime.h" -#include "ttimer.h" -#include "tutil.h" - -typedef struct _c_hash_t { - uint32_t ip; - uint16_t port; - struct _c_hash_t *prev; - struct _c_hash_t *next; - void * data; - uint64_t time; -} SConnHash; - -typedef struct { - SConnHash ** connHashList; - mpool_h connHashMemPool; - int maxSessions; - int total; - int * count; - int64_t keepTimer; - pthread_mutex_t mutex; - void (*cleanFp)(void *); - void *tmrCtrl; - void *pTimer; -} SConnCache; - -int taosHashConn(void *handle, uint32_t ip, uint16_t port, char *user) { - SConnCache *pObj = (SConnCache *)handle; - int hash = 0; - // size_t user_len = strlen(user); - - hash = ip >> 16; - hash += (unsigned short)(ip & 0xFFFF); - hash += port; - while (*user != '\0') { - hash += *user; - user++; - } - - hash = hash % pObj->maxSessions; - - return hash; -} - -void taosRemoveExpiredNodes(SConnCache *pObj, SConnHash *pNode, int hash, uint64_t time) { - if (pNode == NULL) return; - if (time < pObj->keepTimer + pNode->time) return; - - SConnHash *pPrev = pNode->prev, *pNext; - - while (pNode) { - (*pObj->cleanFp)(pNode->data); - pNext = pNode->next; - pObj->total--; - pObj->count[hash]--; - tscTrace("%p ip:0x%x:%hu:%d:%p removed, connections in cache:%d", pNode->data, pNode->ip, pNode->port, hash, pNode, - pObj->count[hash]); - taosMemPoolFree(pObj->connHashMemPool, (char *)pNode); - pNode = pNext; - } - - if (pPrev) - pPrev->next = NULL; - else - pObj->connHashList[hash] = NULL; -} - -void *taosAddConnIntoCache(void *handle, void *data, uint32_t ip, uint16_t port, char *user) { - int hash; - SConnHash * pNode; - SConnCache *pObj; - - uint64_t time = taosGetTimestampMs(); - - pObj = (SConnCache *)handle; - if (pObj == NULL || pObj->maxSessions == 0) return NULL; - - if (data == NULL) { - tscTrace("data:%p ip:%p:%d not valid, not added in cache", data, ip, port); - return NULL; - } - - hash = taosHashConn(pObj, ip, port, user); - pNode = (SConnHash *)taosMemPoolMalloc(pObj->connHashMemPool); - pNode->ip = ip; - pNode->port = port; - pNode->data = data; - pNode->prev = NULL; - pNode->time = time; - - pthread_mutex_lock(&pObj->mutex); - - pNode->next = pObj->connHashList[hash]; - if (pObj->connHashList[hash] != NULL) (pObj->connHashList[hash])->prev = pNode; - pObj->connHashList[hash] = pNode; - - pObj->total++; - pObj->count[hash]++; - taosRemoveExpiredNodes(pObj, pNode->next, hash, time); - - pthread_mutex_unlock(&pObj->mutex); - - tscTrace("%p ip:0x%x:%hu:%d:%p added, connections in cache:%d", data, ip, port, hash, pNode, pObj->count[hash]); - - return pObj; -} - -void taosCleanConnCache(void *handle, void *tmrId) { - int hash; - SConnHash * pNode; - SConnCache *pObj; - - pObj = (SConnCache *)handle; - if (pObj == NULL || pObj->maxSessions == 0) return; - if (pObj->pTimer != tmrId) return; - - uint64_t time = taosGetTimestampMs(); - - for (hash = 0; hash < pObj->maxSessions; ++hash) { - pthread_mutex_lock(&pObj->mutex); - pNode = pObj->connHashList[hash]; - taosRemoveExpiredNodes(pObj, pNode, hash, time); - pthread_mutex_unlock(&pObj->mutex); - } - - // tscTrace("timer, total connections in cache:%d", pObj->total); - taosTmrReset(taosCleanConnCache, pObj->keepTimer * 2, pObj, pObj->tmrCtrl, &pObj->pTimer); -} - -void *taosGetConnFromCache(void *handle, uint32_t ip, uint16_t port, char *user) { - int hash; - SConnHash * pNode; - SConnCache *pObj; - void * pData = NULL; - - pObj = (SConnCache *)handle; - if (pObj == NULL || pObj->maxSessions == 0) return NULL; - - uint64_t time = taosGetTimestampMs(); - - hash = taosHashConn(pObj, ip, port, user); - pthread_mutex_lock(&pObj->mutex); - - pNode = pObj->connHashList[hash]; - while (pNode) { - if (time >= pObj->keepTimer + pNode->time) { - taosRemoveExpiredNodes(pObj, pNode, hash, time); - pNode = NULL; - break; - } - - if (pNode->ip == ip && pNode->port == port) break; - - pNode = pNode->next; - } - - if (pNode) { - taosRemoveExpiredNodes(pObj, pNode->next, hash, time); - - if (pNode->prev) { - pNode->prev->next = pNode->next; - } else { - pObj->connHashList[hash] = pNode->next; - } - - if (pNode->next) { - pNode->next->prev = pNode->prev; - } - - pData = pNode->data; - taosMemPoolFree(pObj->connHashMemPool, (char *)pNode); - pObj->total--; - pObj->count[hash]--; - } - - pthread_mutex_unlock(&pObj->mutex); - - if (pData) { - tscTrace("%p ip:0x%x:%hu:%d:%p retrieved, connections in cache:%d", pData, ip, port, hash, pNode, pObj->count[hash]); - } - - return pData; -} - -void *taosOpenConnCache(int maxSessions, void (*cleanFp)(void *), void *tmrCtrl, int64_t keepTimer) { - SConnHash **connHashList; - mpool_h connHashMemPool; - SConnCache *pObj; - - connHashMemPool = taosMemPoolInit(maxSessions, sizeof(SConnHash)); - if (connHashMemPool == 0) return NULL; - - connHashList = calloc(sizeof(SConnHash *), maxSessions); - if (connHashList == 0) { - taosMemPoolCleanUp(connHashMemPool); - return NULL; - } - - pObj = malloc(sizeof(SConnCache)); - if (pObj == NULL) { - taosMemPoolCleanUp(connHashMemPool); - free(connHashList); - return NULL; - } - memset(pObj, 0, sizeof(SConnCache)); - - pObj->count = calloc(sizeof(int), maxSessions); - pObj->total = 0; - pObj->keepTimer = keepTimer; - pObj->maxSessions = maxSessions; - pObj->connHashMemPool = connHashMemPool; - pObj->connHashList = connHashList; - pObj->cleanFp = cleanFp; - pObj->tmrCtrl = tmrCtrl; - taosTmrReset(taosCleanConnCache, pObj->keepTimer * 2, pObj, pObj->tmrCtrl, &pObj->pTimer); - - pthread_mutex_init(&pObj->mutex, NULL); - - return pObj; -} - -void taosCloseConnCache(void *handle) { - SConnCache *pObj; - - pObj = (SConnCache *)handle; - if (pObj == NULL || pObj->maxSessions == 0) return; - - pthread_mutex_lock(&pObj->mutex); - - taosTmrStopA(&(pObj->pTimer)); - - if (pObj->connHashMemPool) taosMemPoolCleanUp(pObj->connHashMemPool); - - tfree(pObj->connHashList); - tfree(pObj->count) - - pthread_mutex_unlock(&pObj->mutex); - - pthread_mutex_destroy(&pObj->mutex); - - memset(pObj, 0, sizeof(SConnCache)); - free(pObj); -} diff --git a/src/client/src/tscFunctionImpl.c b/src/client/src/tscFunctionImpl.c index 9c4f7e9c55b315263edaa4bbcadef11b6089593f..689f9715d08b4569817d16daca224c2482c7643c 100644 --- a/src/client/src/tscFunctionImpl.c +++ b/src/client/src/tscFunctionImpl.c @@ -14,20 +14,21 @@ */ #include "os.h" +#include "qast.h" +#include "qextbuffer.h" +#include "qhistogram.h" +#include "qinterpolation.h" +#include "qpercentile.h" +#include "qsyntaxtreefunction.h" +#include "qtsbuf.h" +#include "taosdef.h" #include "taosmsg.h" -#include "tast.h" -#include "textbuffer.h" -#include "thistogram.h" -#include "tinterpolation.h" #include "tlog.h" -#include "tscJoinProcess.h" -#include "tscSyntaxtreefunction.h" +#include "tscSubquery.h" #include "tscompression.h" #include "tsqlfunction.h" #include "ttime.h" -#include "taosdef.h" #include "tutil.h" -#include "tpercentile.h" #define GET_INPUT_CHAR(x) (((char *)((x)->aInputElemBuf)) + ((x)->startOffset) * ((x)->inputBytes)) #define GET_INPUT_CHAR_INDEX(x, y) (GET_INPUT_CHAR(x) + (y) * (x)->inputBytes) @@ -157,7 +158,7 @@ int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionI pError("Illegal data type %d or data type length %d", dataType, dataBytes); return TSDB_CODE_INVALID_SQL; } - + if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_PRJ || functionId == TSDB_FUNC_TAGPRJ || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_INTERP) { @@ -166,47 +167,47 @@ int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionI *intermediateResBytes = *bytes + sizeof(SResultInfo); return TSDB_CODE_SUCCESS; } - + if (functionId == TSDB_FUNC_COUNT) { *type = TSDB_DATA_TYPE_BIGINT; *bytes = sizeof(int64_t); *intermediateResBytes = *bytes; return TSDB_CODE_SUCCESS; } - + if (functionId == TSDB_FUNC_ARITHM) { *type = TSDB_DATA_TYPE_DOUBLE; *bytes = sizeof(double); *intermediateResBytes = *bytes; return TSDB_CODE_SUCCESS; } - + if (functionId == TSDB_FUNC_TS_COMP) { *type = TSDB_DATA_TYPE_BINARY; *bytes = sizeof(int32_t); // this results is compressed ts data *intermediateResBytes = POINTER_BYTES; return TSDB_CODE_SUCCESS; } - + if (isSuperTable) { if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) { *type = TSDB_DATA_TYPE_BINARY; *bytes = dataBytes + DATA_SET_FLAG_SIZE; *intermediateResBytes = *bytes; - + return TSDB_CODE_SUCCESS; } else if (functionId == TSDB_FUNC_SUM) { *type = TSDB_DATA_TYPE_BINARY; *bytes = sizeof(SSumInfo); *intermediateResBytes = *bytes; - + return TSDB_CODE_SUCCESS; } else if (functionId == TSDB_FUNC_AVG) { *type = TSDB_DATA_TYPE_BINARY; *bytes = sizeof(SAvgInfo); *intermediateResBytes = *bytes; return TSDB_CODE_SUCCESS; - + } else if (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE) { *type = TSDB_DATA_TYPE_DOUBLE; *bytes = sizeof(SRateInfo); @@ -216,25 +217,25 @@ int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionI *type = TSDB_DATA_TYPE_BINARY; *bytes = sizeof(STopBotInfo) + (sizeof(tValuePair) + POINTER_BYTES + extLength) * param; *intermediateResBytes = *bytes; - + return TSDB_CODE_SUCCESS; } else if (functionId == TSDB_FUNC_SPREAD) { *type = TSDB_DATA_TYPE_BINARY; *bytes = sizeof(SSpreadInfo); *intermediateResBytes = *bytes; - + return TSDB_CODE_SUCCESS; } else if (functionId == TSDB_FUNC_APERCT) { *type = TSDB_DATA_TYPE_BINARY; *bytes = sizeof(SHistBin) * (MAX_HISTOGRAM_BIN + 1) + sizeof(SHistogramInfo) + sizeof(SAPercentileInfo); *intermediateResBytes = *bytes; - + return TSDB_CODE_SUCCESS; } else if (functionId == TSDB_FUNC_LAST_ROW) { *type = TSDB_DATA_TYPE_BINARY; *bytes = sizeof(SLastrowInfo) + dataBytes; *intermediateResBytes = *bytes; - + return TSDB_CODE_SUCCESS; } else if (functionId == TSDB_FUNC_TWA) { *type = TSDB_DATA_TYPE_DOUBLE; @@ -243,14 +244,14 @@ int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionI return TSDB_CODE_SUCCESS; } } - + if (functionId == TSDB_FUNC_SUM) { if (dataType >= TSDB_DATA_TYPE_TINYINT && dataType <= TSDB_DATA_TYPE_BIGINT) { *type = TSDB_DATA_TYPE_BIGINT; } else { *type = TSDB_DATA_TYPE_DOUBLE; } - + *bytes = sizeof(int64_t); *intermediateResBytes = sizeof(SSumInfo); return TSDB_CODE_SUCCESS; @@ -266,7 +267,7 @@ int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionI *intermediateResBytes = sizeof(STwaInfo); return TSDB_CODE_SUCCESS; } - + if (functionId == TSDB_FUNC_AVG) { *type = TSDB_DATA_TYPE_DOUBLE; *bytes = sizeof(double); @@ -306,9 +307,9 @@ int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionI } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) { *type = (int16_t)dataType; *bytes = (int16_t)dataBytes; - + size_t size = sizeof(STopBotInfo) + (sizeof(tValuePair) + POINTER_BYTES + extLength) * param; - + // the output column may be larger than sizeof(STopBotInfo) *intermediateResBytes = size; } else if (functionId == TSDB_FUNC_LAST_ROW) { @@ -318,7 +319,7 @@ int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionI } else { return TSDB_CODE_INVALID_SQL; } - + return TSDB_CODE_SUCCESS; } @@ -334,20 +335,20 @@ void resetResultInfo(SResultInfo *pResInfo) { pResInfo->initialized = false; } void initResultInfo(SResultInfo *pResInfo) { pResInfo->initialized = true; // the this struct has been initialized flag - + pResInfo->complete = false; pResInfo->hasResult = false; pResInfo->numOfRes = 0; - + memset(pResInfo->interResultBuf, 0, (size_t)pResInfo->bufLen); } void setResultInfoBuf(SResultInfo *pResInfo, int32_t size, bool superTable) { assert(pResInfo->interResultBuf == NULL); - + pResInfo->bufLen = size; pResInfo->superTableQ = superTable; - + pResInfo->interResultBuf = calloc(1, (size_t)size); } @@ -362,9 +363,9 @@ static bool function_setup(SQLFunctionCtx *pCtx) { if (pResInfo->initialized) { return false; } - + memset(pCtx->aOutputBuf, 0, (size_t)pCtx->outputBytes); - + initResultInfo(pResInfo); return true; } @@ -378,7 +379,7 @@ static bool function_setup(SQLFunctionCtx *pCtx) { */ static void function_finalizer(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + if (pResInfo->hasResult != DATA_SET_FLAG) { pTrace("no result generated, result is set to NULL"); setNull(pCtx->aOutputBuf, pCtx->outputType, pCtx->outputBytes); @@ -387,50 +388,43 @@ static void function_finalizer(SQLFunctionCtx *pCtx) { doFinalizer(pCtx); } +static bool usePreVal(SQLFunctionCtx *pCtx) { + return pCtx->preAggVals.isSet && pCtx->size == pCtx->preAggVals.size; +} + /* * count function does need the finalize, if data is missing, the default value, which is 0, is used * count function does not use the pCtx->interResBuf to keep the intermediate buffer */ static void count_function(SQLFunctionCtx *pCtx) { int32_t numOfElem = 0; - - if (IS_DATA_BLOCK_LOADED(pCtx->blockStatus)) { - /* - * In following cases, the data block is loaded: - * 1. A first/last file block for a query - * 2. Required to handle other queries, such as apercentile/twa/stddev etc. - * 3. A cache block - */ + + /* + * 1. column data missing (schema modified) causes pCtx->hasNull == true. pCtx->preAggVals.isSet == true; + * 2. for general non-primary key columns, pCtx->hasNull may be true or false, pCtx->preAggVals.isSet == true; + * 3. for primary key column, pCtx->hasNull always be false, pCtx->preAggVals.isSet == false; + */ + if (usePreVal(pCtx)) { + numOfElem = pCtx->size - pCtx->preAggVals.statis.numOfNull; + } else { if (pCtx->hasNull) { for (int32_t i = 0; i < pCtx->size; ++i) { char *val = GET_INPUT_CHAR_INDEX(pCtx, i); if (isNull(val, pCtx->inputType)) { continue; } - + numOfElem += 1; } } else { numOfElem = pCtx->size; } - } else { - /* - * 1. column data missing (schema modified) causes pCtx->hasNull == true. pCtx->preAggVals.isSet == true; - * 2. for general non-primary key columns, pCtx->hasNull may be true or false, pCtx->preAggVals.isSet == true; - * 3. for primary key column, pCtx->hasNull always be false, pCtx->preAggVals.isSet == false; - */ - if (pCtx->preAggVals.isSet) { - numOfElem = pCtx->size - pCtx->preAggVals.numOfNull; - } else { - assert(pCtx->hasNull == false); - numOfElem = pCtx->size; - } } - + if (numOfElem > 0) { GET_RES_INFO(pCtx)->hasResult = DATA_SET_FLAG; } - + *((int64_t *)pCtx->aOutputBuf) += numOfElem; SET_VAL(pCtx, numOfElem, 1); } @@ -440,11 +434,11 @@ static void count_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + SET_VAL(pCtx, 1, 1); - + *((int64_t *)pCtx->aOutputBuf) += 1; - + // do not need it actually SResultInfo *pInfo = GET_RES_INFO(pCtx); pInfo->hasResult = DATA_SET_FLAG; @@ -455,7 +449,7 @@ static void count_func_merge(SQLFunctionCtx *pCtx) { for (int32_t i = 0; i < pCtx->size; ++i) { *((int64_t *)pCtx->aOutputBuf) += pData[i]; } - + SET_VAL(pCtx, pCtx->size, 1); } @@ -468,7 +462,7 @@ static void count_func_merge(SQLFunctionCtx *pCtx) { * @param filterCols * @return */ -int32_t count_load_data_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId, int32_t blockStatus) { +int32_t count_load_data_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId) { if (colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) { return BLK_DATA_NO_NEEDED; } else { @@ -476,7 +470,7 @@ int32_t count_load_data_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32 } } -int32_t no_data_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId, int32_t blockStatus) { +int32_t no_data_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId) { return BLK_DATA_NO_NEEDED; } @@ -529,26 +523,26 @@ do { \ static void do_sum(SQLFunctionCtx *pCtx) { int32_t notNullElems = 0; - + // Only the pre-computing information loaded and actual data does not loaded - if (!IS_DATA_BLOCK_LOADED(pCtx->blockStatus) && pCtx->preAggVals.isSet) { - notNullElems = pCtx->size - pCtx->preAggVals.numOfNull; - assert(pCtx->size >= pCtx->preAggVals.numOfNull); - + if (pCtx->preAggVals.isSet && pCtx->preAggVals.size == pCtx->size) { + notNullElems = pCtx->size - pCtx->preAggVals.statis.numOfNull; + assert(pCtx->size >= pCtx->preAggVals.statis.numOfNull); + if (pCtx->inputType >= TSDB_DATA_TYPE_TINYINT && pCtx->inputType <= TSDB_DATA_TYPE_BIGINT) { int64_t *retVal = (int64_t*) pCtx->aOutputBuf; - *retVal += pCtx->preAggVals.sum; + *retVal += pCtx->preAggVals.statis.sum; } else if (pCtx->inputType == TSDB_DATA_TYPE_DOUBLE || pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { double *retVal = (double*) pCtx->aOutputBuf; - *retVal += GET_DOUBLE_VAL(&(pCtx->preAggVals.sum)); + *retVal += GET_DOUBLE_VAL(&(pCtx->preAggVals.statis.sum)); } } else { // computing based on the true data block void *pData = GET_INPUT_CHAR(pCtx); notNullElems = 0; - + if (pCtx->inputType >= TSDB_DATA_TYPE_TINYINT && pCtx->inputType <= TSDB_DATA_TYPE_BIGINT) { int64_t *retVal = (int64_t*) pCtx->aOutputBuf; - + if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { LIST_ADD_N(*retVal, pCtx, pData, int8_t, notNullElems, pCtx->inputType); } else if (pCtx->inputType == TSDB_DATA_TYPE_SMALLINT) { @@ -566,10 +560,10 @@ static void do_sum(SQLFunctionCtx *pCtx) { LIST_ADD_N(*retVal, pCtx, pData, float, notNullElems, pCtx->inputType); } } - + // data in the check operation are all null, not output SET_VAL(pCtx, notNullElems, 1); - + if (notNullElems > 0) { GET_RES_INFO(pCtx)->hasResult = DATA_SET_FLAG; } @@ -580,10 +574,10 @@ static void do_sum_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + SET_VAL(pCtx, 1, 1); int64_t *res = (int64_t*) pCtx->aOutputBuf; - + if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { *res += GET_INT8_VAL(pData); } else if (pCtx->inputType == TSDB_DATA_TYPE_SMALLINT) { @@ -599,13 +593,13 @@ static void do_sum_f(SQLFunctionCtx *pCtx, int32_t index) { double *retVal = (double*) pCtx->aOutputBuf; *retVal += GET_FLOAT_VAL(pData); } - + GET_RES_INFO(pCtx)->hasResult = DATA_SET_FLAG; } static void sum_function(SQLFunctionCtx *pCtx) { do_sum(pCtx); - + // keep the result data in output buffer, not in the intermediate buffer SResultInfo *pResInfo = GET_RES_INFO(pCtx); if (pResInfo->hasResult == DATA_SET_FLAG && pResInfo->superTableQ) { @@ -617,7 +611,7 @@ static void sum_function(SQLFunctionCtx *pCtx) { static void sum_function_f(SQLFunctionCtx *pCtx, int32_t index) { do_sum_f(pCtx, index); - + // keep the result data in output buffer, not in the intermediate buffer SResultInfo *pResInfo = GET_RES_INFO(pCtx); if (pResInfo->hasResult == DATA_SET_FLAG && pResInfo->superTableQ) { @@ -628,20 +622,20 @@ static void sum_function_f(SQLFunctionCtx *pCtx, int32_t index) { static int32_t sum_merge_impl(const SQLFunctionCtx *pCtx) { int32_t notNullElems = 0; - + GET_TRUE_DATA_TYPE(); SResultInfo *pResInfo = GET_RES_INFO(pCtx); assert(pResInfo->superTableQ); - + for (int32_t i = 0; i < pCtx->size; ++i) { char * input = GET_INPUT_CHAR_INDEX(pCtx, i); SSumInfo *pInput = (SSumInfo *)input; if (pInput->hasResult != DATA_SET_FLAG) { continue; } - + notNullElems++; - + switch (type) { case TSDB_DATA_TYPE_TINYINT: case TSDB_DATA_TYPE_SMALLINT: @@ -656,16 +650,16 @@ static int32_t sum_merge_impl(const SQLFunctionCtx *pCtx) { } } } - + return notNullElems; } static void sum_func_merge(SQLFunctionCtx *pCtx) { int32_t notNullElems = sum_merge_impl(pCtx); - + SET_VAL(pCtx, notNullElems, 1); SSumInfo *pSumInfo = (SSumInfo *)pCtx->aOutputBuf; - + if (notNullElems > 0) { // pCtx->numOfIteratedElems += notNullElems; pSumInfo->hasResult = DATA_SET_FLAG; @@ -674,29 +668,29 @@ static void sum_func_merge(SQLFunctionCtx *pCtx) { static void sum_func_second_merge(SQLFunctionCtx *pCtx) { int32_t notNullElems = sum_merge_impl(pCtx); - + SET_VAL(pCtx, notNullElems, 1); SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + if (notNullElems > 0) { pResInfo->hasResult = DATA_SET_FLAG; } } -static int32_t precal_req_load_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId, int32_t blockStatus) { +static int32_t precal_req_load_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId) { return BLK_DATA_FILEDS_NEEDED; } -static int32_t data_req_load_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId, int32_t blockStatus) { +static int32_t data_req_load_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId) { return BLK_DATA_ALL_NEEDED; } // todo: if column in current data block are null, opt for this case -static int32_t first_data_req_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId, int32_t blockStatus) { +static int32_t first_data_req_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId) { if (pCtx->order == TSQL_SO_DESC) { return BLK_DATA_NO_NEEDED; } - + // no result for first query, data block is required if (GET_RES_INFO(pCtx)->numOfRes <= 0) { return BLK_DATA_ALL_NEEDED; @@ -705,11 +699,11 @@ static int32_t first_data_req_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, } } -static int32_t last_data_req_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId, int32_t blockStatus) { +static int32_t last_data_req_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId) { if (pCtx->order == TSQL_SO_ASC) { return BLK_DATA_NO_NEEDED; } - + if (GET_RES_INFO(pCtx)->numOfRes <= 0) { return BLK_DATA_ALL_NEEDED; } else { @@ -717,12 +711,11 @@ static int32_t last_data_req_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, } } -static int32_t first_dist_data_req_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId, - int32_t blockStatus) { +static int32_t first_dist_data_req_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId) { if (pCtx->order == TSQL_SO_DESC) { return BLK_DATA_NO_NEEDED; } - + // result buffer has not been set yet. return BLK_DATA_ALL_NEEDED; //todo optimize the filter info @@ -734,12 +727,11 @@ static int32_t first_dist_data_req_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY // } } -static int32_t last_dist_data_req_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId, - int32_t blockStatus) { +static int32_t last_dist_data_req_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY end, int32_t colId) { if (pCtx->order == TSQL_SO_ASC) { return BLK_DATA_NO_NEEDED; } - + return BLK_DATA_ALL_NEEDED; // SFirstLastInfo *pInfo = (SFirstLastInfo*) (pCtx->aOutputBuf + pCtx->inputBytes); // if (pInfo->hasResult != DATA_SET_FLAG) { @@ -757,26 +749,26 @@ static int32_t last_dist_data_req_info(SQLFunctionCtx *pCtx, TSKEY start, TSKEY */ static void avg_function(SQLFunctionCtx *pCtx) { int32_t notNullElems = 0; - + // NOTE: keep the intermediate result into the interResultBuf SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + SAvgInfo *pAvgInfo = (SAvgInfo *)pResInfo->interResultBuf; double * pVal = &pAvgInfo->sum; - - if (!IS_DATA_BLOCK_LOADED(pCtx->blockStatus) && pCtx->preAggVals.isSet) { + + if (usePreVal(pCtx)) { // Pre-aggregation - notNullElems = pCtx->size - pCtx->preAggVals.numOfNull; + notNullElems = pCtx->size - pCtx->preAggVals.statis.numOfNull; assert(notNullElems >= 0); - + if (pCtx->inputType >= TSDB_DATA_TYPE_TINYINT && pCtx->inputType <= TSDB_DATA_TYPE_BIGINT) { - *pVal += pCtx->preAggVals.sum; + *pVal += pCtx->preAggVals.statis.sum; } else if (pCtx->inputType == TSDB_DATA_TYPE_DOUBLE || pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { - *pVal += GET_DOUBLE_VAL(&(pCtx->preAggVals.sum)); + *pVal += GET_DOUBLE_VAL(&(pCtx->preAggVals.statis.sum)); } } else { void *pData = GET_INPUT_CHAR(pCtx); - + if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { LIST_ADD_N(*pVal, pCtx, pData, int8_t, notNullElems, pCtx->inputType); } else if (pCtx->inputType == TSDB_DATA_TYPE_SMALLINT) { @@ -791,18 +783,18 @@ static void avg_function(SQLFunctionCtx *pCtx) { LIST_ADD_N(*pVal, pCtx, pData, float, notNullElems, pCtx->inputType); } } - + if (!pCtx->hasNull) { assert(notNullElems == pCtx->size); } - + SET_VAL(pCtx, notNullElems, 1); pAvgInfo->num += notNullElems; - + if (notNullElems > 0) { pResInfo->hasResult = DATA_SET_FLAG; } - + // keep the data into the final output buffer for super table query since this execution may be the last one if (pResInfo->superTableQ) { memcpy(pCtx->aOutputBuf, pResInfo->interResultBuf, sizeof(SAvgInfo)); @@ -814,14 +806,14 @@ static void avg_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + SET_VAL(pCtx, 1, 1); - + // NOTE: keep the intermediate result into the interResultBuf SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + SAvgInfo *pAvgInfo = (SAvgInfo *)pResInfo->interResultBuf; - + if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { pAvgInfo->sum += GET_INT8_VAL(pData); } else if (pCtx->inputType == TSDB_DATA_TYPE_SMALLINT) { @@ -835,13 +827,13 @@ static void avg_function_f(SQLFunctionCtx *pCtx, int32_t index) { } else if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { pAvgInfo->sum += GET_FLOAT_VAL(pData); } - + // restore sum and count of elements pAvgInfo->num += 1; - + // set has result flag pResInfo->hasResult = DATA_SET_FLAG; - + // keep the data into the final output buffer for super table query since this execution may be the last one if (pResInfo->superTableQ) { memcpy(pCtx->aOutputBuf, pResInfo->interResultBuf, sizeof(SAvgInfo)); @@ -851,20 +843,20 @@ static void avg_function_f(SQLFunctionCtx *pCtx, int32_t index) { static void avg_func_merge(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); assert(pResInfo->superTableQ); - + SAvgInfo *pAvgInfo = (SAvgInfo *)pResInfo->interResultBuf; char * input = GET_INPUT_CHAR(pCtx); - + for (int32_t i = 0; i < pCtx->size; ++i, input += pCtx->inputBytes) { SAvgInfo *pInput = (SAvgInfo *)input; if (pInput->num == 0) { // current buffer is null continue; } - + pAvgInfo->sum += pInput->sum; pAvgInfo->num += pInput->num; } - + // if the data set hasResult is not set, the result is null if (pAvgInfo->num > 0) { pResInfo->hasResult = DATA_SET_FLAG; @@ -874,18 +866,18 @@ static void avg_func_merge(SQLFunctionCtx *pCtx) { static void avg_func_second_merge(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + double *sum = (double*) pCtx->aOutputBuf; char * input = GET_INPUT_CHAR(pCtx); - + for (int32_t i = 0; i < pCtx->size; ++i, input += pCtx->inputBytes) { SAvgInfo *pInput = (SAvgInfo *)input; if (pInput->num == 0) { // current input is null continue; } - + *sum += pInput->sum; - + // keep the number of data into the temp buffer *(int64_t *)pResInfo->interResultBuf += pInput->num; } @@ -896,29 +888,29 @@ static void avg_func_second_merge(SQLFunctionCtx *pCtx) { */ static void avg_finalizer(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + if (pCtx->currentStage == SECONDARY_STAGE_MERGE) { assert(pCtx->inputType == TSDB_DATA_TYPE_BINARY); - + if (GET_INT64_VAL(pResInfo->interResultBuf) <= 0) { setNull(pCtx->aOutputBuf, pCtx->outputType, pCtx->outputBytes); return; // empty table } - + *(double *)pCtx->aOutputBuf = (*(double *)pCtx->aOutputBuf) / *(int64_t *)pResInfo->interResultBuf; } else { // this is the secondary merge, only in the secondary merge, the input type is TSDB_DATA_TYPE_BINARY assert(pCtx->inputType >= TSDB_DATA_TYPE_TINYINT && pCtx->inputType <= TSDB_DATA_TYPE_DOUBLE); - + SAvgInfo *pAvgInfo = (SAvgInfo *)pResInfo->interResultBuf; - + if (pAvgInfo->num == 0) { // all data are NULL or empty table setNull(pCtx->aOutputBuf, pCtx->outputType, pCtx->outputBytes); return; } - + *(double *)pCtx->aOutputBuf = pAvgInfo->sum / pAvgInfo->num; } - + // cannot set the numOfIteratedElems again since it is set during previous iteration GET_RES_INFO(pCtx)->numOfRes = 1; doFinalizer(pCtx); @@ -927,22 +919,22 @@ static void avg_finalizer(SQLFunctionCtx *pCtx) { ///////////////////////////////////////////////////////////////////////////////////////////// static void minMax_function(SQLFunctionCtx *pCtx, char *pOutput, int32_t isMin, int32_t *notNullElems) { - if (!IS_DATA_BLOCK_LOADED(pCtx->blockStatus) && pCtx->preAggVals.isSet) { - // data in current data block are qualified to the query - *notNullElems = pCtx->size - pCtx->preAggVals.numOfNull; + // data in current data block are qualified to the query + if (usePreVal(pCtx)) { + *notNullElems = pCtx->size - pCtx->preAggVals.statis.numOfNull; assert(*notNullElems >= 0); - + void * tval = NULL; int16_t index = 0; - + if (isMin) { - tval = &pCtx->preAggVals.min; - index = pCtx->preAggVals.minIndex; + tval = &pCtx->preAggVals.statis.min; + index = pCtx->preAggVals.statis.minIndex; } else { - tval = &pCtx->preAggVals.max; - index = pCtx->preAggVals.maxIndex; + tval = &pCtx->preAggVals.statis.max; + index = pCtx->preAggVals.statis.maxIndex; } - + /** * NOTE: work around the bug caused by invalid pre-calculated function. * Here the selectivity + ts will not return correct value. @@ -954,23 +946,23 @@ static void minMax_function(SQLFunctionCtx *pCtx, char *pOutput, int32_t isMin, } TSKEY key = pCtx->ptsList[index]; - + if (pCtx->inputType >= TSDB_DATA_TYPE_TINYINT && pCtx->inputType <= TSDB_DATA_TYPE_BIGINT) { int64_t val = GET_INT64_VAL(tval); if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { int8_t *data = (int8_t *)pOutput; - + UPDATE_DATA(pCtx, *data, val, notNullElems, isMin, key); } else if (pCtx->inputType == TSDB_DATA_TYPE_SMALLINT) { int16_t *data = (int16_t *)pOutput; - + UPDATE_DATA(pCtx, *data, val, notNullElems, isMin, key); } else if (pCtx->inputType == TSDB_DATA_TYPE_INT) { int32_t *data = (int32_t *)pOutput; #if defined(_DEBUG_VIEW) pTrace("max value updated according to pre-cal:%d", *data); #endif - + if ((*data < val) ^ isMin) { *data = val; for (int32_t i = 0; i < (pCtx)->tagInfo.numOfTagCols; ++i) { @@ -978,7 +970,7 @@ static void minMax_function(SQLFunctionCtx *pCtx, char *pOutput, int32_t isMin, if (__ctx->functionId == TSDB_FUNC_TS_DUMMY) { __ctx->tag = (tVariant){.i64Key = key, .nType = TSDB_DATA_TYPE_BIGINT}; } - + aAggs[TSDB_FUNC_TAG].xFunction(__ctx); } } @@ -989,21 +981,21 @@ static void minMax_function(SQLFunctionCtx *pCtx, char *pOutput, int32_t isMin, } else if (pCtx->inputType == TSDB_DATA_TYPE_DOUBLE) { double *data = (double *)pOutput; double val = GET_DOUBLE_VAL(tval); - + UPDATE_DATA(pCtx, *data, val, notNullElems, isMin, key); } else if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { float *data = (float *)pOutput; double val = GET_DOUBLE_VAL(tval); - + UPDATE_DATA(pCtx, *data, val, notNullElems, isMin, key); } - + return; } - + void *p = GET_INPUT_CHAR(pCtx); *notNullElems = 0; - + if (pCtx->inputType >= TSDB_DATA_TYPE_TINYINT && pCtx->inputType <= TSDB_DATA_TYPE_BIGINT) { if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { TYPED_LOOPCHECK_N(int8_t, pOutput, p, pCtx, pCtx->inputType, isMin, *notNullElems); @@ -1012,19 +1004,19 @@ static void minMax_function(SQLFunctionCtx *pCtx, char *pOutput, int32_t isMin, } else if (pCtx->inputType == TSDB_DATA_TYPE_INT) { int32_t *pData = p; int32_t *retVal = (int32_t*) pOutput; - + for (int32_t i = 0; i < pCtx->size; ++i) { if (pCtx->hasNull && isNull((const char*)&pData[i], pCtx->inputType)) { continue; } - + if ((*retVal < pData[i]) ^ isMin) { *retVal = pData[i]; TSKEY k = pCtx->ptsList[i]; - + DO_UPDATE_TAG_COLUMNS(pCtx, k); } - + *notNullElems += 1; } #if defined(_DEBUG_VIEW) @@ -1044,9 +1036,9 @@ static bool min_func_setup(SQLFunctionCtx *pCtx) { if (!function_setup(pCtx)) { return false; // not initialized since it has been initialized } - + GET_TRUE_DATA_TYPE(); - + switch (type) { case TSDB_DATA_TYPE_INT: *((int32_t *)pCtx->aOutputBuf) = INT32_MAX; @@ -1069,7 +1061,7 @@ static bool min_func_setup(SQLFunctionCtx *pCtx) { default: pError("illegal data type:%d in min/max query", pCtx->inputType); } - + return true; } @@ -1077,9 +1069,9 @@ static bool max_func_setup(SQLFunctionCtx *pCtx) { if (!function_setup(pCtx)) { return false; // not initialized since it has been initialized } - + GET_TRUE_DATA_TYPE(); - + switch (type) { case TSDB_DATA_TYPE_INT: *((int32_t *)pCtx->aOutputBuf) = INT32_MIN; @@ -1102,7 +1094,7 @@ static bool max_func_setup(SQLFunctionCtx *pCtx) { default: pError("illegal data type:%d in min/max query", pCtx->inputType); } - + return true; } @@ -1112,13 +1104,13 @@ static bool max_func_setup(SQLFunctionCtx *pCtx) { static void min_function(SQLFunctionCtx *pCtx) { int32_t notNullElems = 0; minMax_function(pCtx, pCtx->aOutputBuf, 1, ¬NullElems); - + SET_VAL(pCtx, notNullElems, 1); - + if (notNullElems > 0) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); pResInfo->hasResult = DATA_SET_FLAG; - + // set the flag for super table query if (pResInfo->superTableQ) { *(pCtx->aOutputBuf + pCtx->inputBytes) = DATA_SET_FLAG; @@ -1129,13 +1121,13 @@ static void min_function(SQLFunctionCtx *pCtx) { static void max_function(SQLFunctionCtx *pCtx) { int32_t notNullElems = 0; minMax_function(pCtx, pCtx->aOutputBuf, 0, ¬NullElems); - + SET_VAL(pCtx, notNullElems, 1); - + if (notNullElems > 0) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); pResInfo->hasResult = DATA_SET_FLAG; - + // set the flag for super table query if (pResInfo->superTableQ) { *(pCtx->aOutputBuf + pCtx->inputBytes) = DATA_SET_FLAG; @@ -1145,18 +1137,18 @@ static void max_function(SQLFunctionCtx *pCtx) { static int32_t minmax_merge_impl(SQLFunctionCtx *pCtx, int32_t bytes, char *output, bool isMin) { int32_t notNullElems = 0; - + GET_TRUE_DATA_TYPE(); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); assert(pResInfo->superTableQ); - + for (int32_t i = 0; i < pCtx->size; ++i) { char *input = GET_INPUT_CHAR_INDEX(pCtx, i); if (input[bytes] != DATA_SET_FLAG) { continue; } - + switch (type) { case TSDB_DATA_TYPE_TINYINT: { int8_t v = GET_INT8_VAL(input); @@ -1172,12 +1164,12 @@ static int32_t minmax_merge_impl(SQLFunctionCtx *pCtx, int32_t bytes, char *outp int32_t v = GET_INT32_VAL(input); if ((*(int32_t *)output < v) ^ isMin) { *(int32_t *)output = v; - + for (int32_t i = 0; i < pCtx->tagInfo.numOfTagCols; ++i) { SQLFunctionCtx *__ctx = pCtx->tagInfo.pTagCtxList[i]; aAggs[TSDB_FUNC_TAG].xFunction(__ctx); } - + notNullElems++; } break; @@ -1201,15 +1193,15 @@ static int32_t minmax_merge_impl(SQLFunctionCtx *pCtx, int32_t bytes, char *outp break; } } - + return notNullElems; } static void min_func_merge(SQLFunctionCtx *pCtx) { int32_t notNullElems = minmax_merge_impl(pCtx, pCtx->inputBytes, pCtx->aOutputBuf, 1); - + SET_VAL(pCtx, notNullElems, 1); - + if (notNullElems > 0) { // for super table query, SResultInfo is not used char *flag = pCtx->aOutputBuf + pCtx->inputBytes; *flag = DATA_SET_FLAG; @@ -1218,9 +1210,9 @@ static void min_func_merge(SQLFunctionCtx *pCtx) { static void min_func_second_merge(SQLFunctionCtx *pCtx) { int32_t notNullElems = minmax_merge_impl(pCtx, pCtx->outputBytes, pCtx->aOutputBuf, 1); - + SET_VAL(pCtx, notNullElems, 1); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); if (notNullElems > 0) { pResInfo->hasResult = DATA_SET_FLAG; @@ -1229,7 +1221,7 @@ static void min_func_second_merge(SQLFunctionCtx *pCtx) { static void max_func_merge(SQLFunctionCtx *pCtx) { int32_t numOfElems = minmax_merge_impl(pCtx, pCtx->inputBytes, pCtx->aOutputBuf, 0); - + SET_VAL(pCtx, numOfElems, 1); if (numOfElems > 0) { char *flag = pCtx->aOutputBuf + pCtx->inputBytes; @@ -1239,9 +1231,9 @@ static void max_func_merge(SQLFunctionCtx *pCtx) { static void max_func_second_merge(SQLFunctionCtx *pCtx) { int32_t numOfElem = minmax_merge_impl(pCtx, pCtx->outputBytes, pCtx->aOutputBuf, 0); - + SET_VAL(pCtx, numOfElem, 1); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); if (numOfElem > 0) { pResInfo->hasResult = DATA_SET_FLAG; @@ -1251,40 +1243,40 @@ static void max_func_second_merge(SQLFunctionCtx *pCtx) { static void minMax_function_f(SQLFunctionCtx *pCtx, int32_t index, int32_t isMin) { char *pData = GET_INPUT_CHAR_INDEX(pCtx, index); TSKEY key = pCtx->ptsList[index]; - + int32_t num = 0; if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { int8_t *output = (int8_t *)pCtx->aOutputBuf; int8_t i = GET_INT8_VAL(pData); - + UPDATE_DATA(pCtx, *output, i, num, isMin, key); } else if (pCtx->inputType == TSDB_DATA_TYPE_SMALLINT) { int16_t *output = (int16_t*) pCtx->aOutputBuf; int16_t i = GET_INT16_VAL(pData); - + UPDATE_DATA(pCtx, *output, i, num, isMin, key); } else if (pCtx->inputType == TSDB_DATA_TYPE_INT) { int32_t *output = (int32_t*) pCtx->aOutputBuf; int32_t i = GET_INT32_VAL(pData); - + UPDATE_DATA(pCtx, *output, i, num, isMin, key); } else if (pCtx->inputType == TSDB_DATA_TYPE_BIGINT) { int64_t *output = (int64_t*) pCtx->aOutputBuf; int64_t i = GET_INT64_VAL(pData); - + UPDATE_DATA(pCtx, *output, i, num, isMin, key); } else if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { float *output = (float*) pCtx->aOutputBuf; float i = GET_FLOAT_VAL(pData); - + UPDATE_DATA(pCtx, *output, i, num, isMin, key); } else if (pCtx->inputType == TSDB_DATA_TYPE_DOUBLE) { double *output = (double*) pCtx->aOutputBuf; double i = GET_DOUBLE_VAL(pData); - + UPDATE_DATA(pCtx, *output, i, num, isMin, key); } - + GET_RES_INFO(pCtx)->hasResult = DATA_SET_FLAG; } @@ -1293,10 +1285,10 @@ static void max_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + SET_VAL(pCtx, 1, 1); minMax_function_f(pCtx, index, 0); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); if (pResInfo->hasResult == DATA_SET_FLAG) { char *flag = pCtx->aOutputBuf + pCtx->inputBytes; @@ -1309,10 +1301,10 @@ static void min_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + SET_VAL(pCtx, 1, 1); minMax_function_f(pCtx, index, 1); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); if (pResInfo->hasResult == DATA_SET_FLAG) { char *flag = pCtx->aOutputBuf + pCtx->inputBytes; @@ -1331,15 +1323,15 @@ static void min_function_f(SQLFunctionCtx *pCtx, int32_t index) { static void stddev_function(SQLFunctionCtx *pCtx) { // the second stage to calculate standard deviation SStddevInfo *pStd = GET_RES_INFO(pCtx)->interResultBuf; - + if (pStd->stage == 0) { // the first stage is to calculate average value avg_function(pCtx); } else { double *retVal = &pStd->res; double avg = pStd->avg; - + void *pData = GET_INPUT_CHAR(pCtx); - + switch (pCtx->inputType) { case TSDB_DATA_TYPE_INT: { for (int32_t i = 0; i < pCtx->size; ++i) { @@ -1373,7 +1365,7 @@ static void stddev_function(SQLFunctionCtx *pCtx) { default: pError("stddev function not support data type:%d", pCtx->inputType); } - + // TODO get the correct data SET_VAL(pCtx, 1, 1); } @@ -1383,18 +1375,18 @@ static void stddev_function_f(SQLFunctionCtx *pCtx, int32_t index) { // the second stage to calculate standard deviation SResultInfo *pResInfo = GET_RES_INFO(pCtx); SStddevInfo *pStd = pResInfo->interResultBuf; - + /* the first stage is to calculate average value */ if (pStd->stage == 0) { avg_function_f(pCtx, index); } else { double avg = pStd->avg; void * pData = GET_INPUT_CHAR_INDEX(pCtx, index); - + if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + switch (pCtx->inputType) { case TSDB_DATA_TYPE_INT: { pStd->res += POW2(GET_INT32_VAL(pData) - avg); @@ -1423,7 +1415,7 @@ static void stddev_function_f(SQLFunctionCtx *pCtx, int32_t index) { default: pError("stddev function not support data type:%d", pCtx->inputType); } - + SET_VAL(pCtx, 1, 1); } } @@ -1435,7 +1427,7 @@ static void stddev_next_step(SQLFunctionCtx *pCtx) { */ SResultInfo *pResInfo = GET_RES_INFO(pCtx); SStddevInfo *pStd = pResInfo->interResultBuf; - + if (pStd->stage == 0) { /* * stddev is calculated in two stage: @@ -1445,12 +1437,12 @@ static void stddev_next_step(SQLFunctionCtx *pCtx) { */ pStd->stage++; avg_finalizer(pCtx); - + pResInfo->initialized = true; // set it initialized to avoid re-initialization // save average value into tmpBuf, for second stage scan SAvgInfo *pAvg = pResInfo->interResultBuf; - + pStd->avg = GET_DOUBLE_VAL(pCtx->aOutputBuf); assert((isnan(pAvg->sum) && pAvg->num == 0) || (pStd->num == pAvg->num && pStd->avg == pAvg->sum)); } else { @@ -1460,7 +1452,7 @@ static void stddev_next_step(SQLFunctionCtx *pCtx) { static void stddev_finalizer(SQLFunctionCtx *pCtx) { SStddevInfo *pStd = (SStddevInfo *)GET_RES_INFO(pCtx)->interResultBuf; - + if (pStd->num <= 0) { setNull(pCtx->aOutputBuf, pCtx->outputType, pCtx->outputBytes); } else { @@ -1477,42 +1469,42 @@ static bool first_last_function_setup(SQLFunctionCtx *pCtx) { if (!function_setup(pCtx)) { return false; } - + // used to keep the timestamp for comparison pCtx->param[1].nType = 0; pCtx->param[1].i64Key = 0; - + return true; } // todo opt for null block static void first_function(SQLFunctionCtx *pCtx) { - if (!IS_DATA_BLOCK_LOADED(pCtx->blockStatus) || pCtx->order == TSQL_SO_DESC) { + if (pCtx->order == TSQL_SO_DESC) { return; } - + int32_t notNullElems = 0; - + // handle the null value for (int32_t i = 0; i < pCtx->size; ++i) { char *data = GET_INPUT_CHAR_INDEX(pCtx, i); if (pCtx->hasNull && isNull(data, pCtx->inputType)) { continue; } - + memcpy(pCtx->aOutputBuf, data, pCtx->inputBytes); - + TSKEY k = pCtx->ptsList[i]; DO_UPDATE_TAG_COLUMNS(pCtx, k); - + SResultInfo *pInfo = GET_RES_INFO(pCtx); pInfo->hasResult = DATA_SET_FLAG; pInfo->complete = true; - + notNullElems++; break; } - + SET_VAL(pCtx, notNullElems, 1); } @@ -1520,18 +1512,18 @@ static void first_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->order == TSQL_SO_DESC) { return; } - + void *pData = GET_INPUT_CHAR_INDEX(pCtx, index); if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + SET_VAL(pCtx, 1, 1); memcpy(pCtx->aOutputBuf, pData, pCtx->inputBytes); TSKEY ts = pCtx->ptsList[index]; DO_UPDATE_TAG_COLUMNS(pCtx, ts); - + SResultInfo *pInfo = GET_RES_INFO(pCtx); pInfo->hasResult = DATA_SET_FLAG; pInfo->complete = true; // get the first not-null data, completed @@ -1539,14 +1531,14 @@ static void first_function_f(SQLFunctionCtx *pCtx, int32_t index) { static void first_data_assign_impl(SQLFunctionCtx *pCtx, char *pData, int32_t index) { int64_t *timestamp = pCtx->ptsList; - + SFirstLastInfo *pInfo = (SFirstLastInfo *)(pCtx->aOutputBuf + pCtx->inputBytes); - + if (pInfo->hasResult != DATA_SET_FLAG || timestamp[index] < pInfo->ts) { memcpy(pCtx->aOutputBuf, pData, pCtx->inputBytes); pInfo->hasResult = DATA_SET_FLAG; pInfo->ts = timestamp[index]; - + DO_UPDATE_TAG_COLUMNS(pCtx, pInfo->ts); } } @@ -1559,34 +1551,34 @@ static void first_dist_function(SQLFunctionCtx *pCtx) { if (pCtx->size == 0) { return; } - + /* * do not to check data in the following cases: * 1. data block that are not loaded * 2. scan data files in desc order */ - if (!IS_DATA_BLOCK_LOADED(pCtx->blockStatus) || pCtx->order == TSQL_SO_DESC) { + if (pCtx->order == TSQL_SO_DESC) { return; } - + int32_t notNullElems = 0; - + // find the first not null value for (int32_t i = 0; i < pCtx->size; ++i) { char *data = GET_INPUT_CHAR_INDEX(pCtx, i); if (pCtx->hasNull && isNull(data, pCtx->inputType)) { continue; } - + first_data_assign_impl(pCtx, data, i); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); pResInfo->hasResult = DATA_SET_FLAG; - + notNullElems++; break; } - + SET_VAL(pCtx, notNullElems, 1); } @@ -1594,32 +1586,32 @@ static void first_dist_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->size == 0) { return; } - + char *pData = GET_INPUT_CHAR_INDEX(pCtx, index); if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + if (pCtx->order == TSQL_SO_DESC) { return; } - + first_data_assign_impl(pCtx, pData, index); - + SET_VAL(pCtx, 1, 1); } static void first_dist_func_merge(SQLFunctionCtx *pCtx) { char *pData = GET_INPUT_CHAR(pCtx); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); assert(pCtx->size == 1 && pResInfo->superTableQ); - + SFirstLastInfo *pInput = (SFirstLastInfo *)(pData + pCtx->inputBytes); if (pInput->hasResult != DATA_SET_FLAG) { return; } - + SFirstLastInfo *pOutput = (SFirstLastInfo *)(pCtx->aOutputBuf + pCtx->inputBytes); if (pOutput->hasResult != DATA_SET_FLAG || pInput->ts < pOutput->ts) { memcpy(pCtx->aOutputBuf, pData, pCtx->inputBytes + sizeof(SFirstLastInfo)); @@ -1629,22 +1621,22 @@ static void first_dist_func_merge(SQLFunctionCtx *pCtx) { static void first_dist_func_second_merge(SQLFunctionCtx *pCtx) { assert(pCtx->resultInfo->superTableQ); - + char * pData = GET_INPUT_CHAR(pCtx); SFirstLastInfo *pInput = (SFirstLastInfo*) (pData + pCtx->outputBytes); if (pInput->hasResult != DATA_SET_FLAG) { return; } - + // The param[1] is used to keep the initial value of max ts value if (pCtx->param[1].nType != pCtx->outputType || pCtx->param[1].i64Key > pInput->ts) { memcpy(pCtx->aOutputBuf, pData, pCtx->outputBytes); pCtx->param[1].i64Key = pInput->ts; pCtx->param[1].nType = pCtx->outputType; - + DO_UPDATE_TAG_COLUMNS(pCtx, pInput->ts); } - + SET_VAL(pCtx, 1, 1); GET_RES_INFO(pCtx)->hasResult = DATA_SET_FLAG; } @@ -1658,31 +1650,31 @@ static void first_dist_func_second_merge(SQLFunctionCtx *pCtx) { * least one data in this block that is not null.(TODO opt for this case) */ static void last_function(SQLFunctionCtx *pCtx) { - if (!IS_DATA_BLOCK_LOADED(pCtx->blockStatus) || pCtx->order == TSQL_SO_ASC) { + if (pCtx->order == TSQL_SO_ASC) { return; } - + int32_t notNullElems = 0; - + for (int32_t i = pCtx->size - 1; i >= 0; --i) { char *data = GET_INPUT_CHAR_INDEX(pCtx, i); if (pCtx->hasNull && isNull(data, pCtx->inputType)) { continue; } - + memcpy(pCtx->aOutputBuf, data, pCtx->inputBytes); - + TSKEY ts = pCtx->ptsList[i]; DO_UPDATE_TAG_COLUMNS(pCtx, ts); - + SResultInfo *pInfo = GET_RES_INFO(pCtx); pInfo->hasResult = DATA_SET_FLAG; - + pInfo->complete = true; // set query completed on this column notNullElems++; break; } - + SET_VAL(pCtx, notNullElems, 1); } @@ -1690,18 +1682,18 @@ static void last_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->order == TSQL_SO_ASC) { return; } - + void *pData = GET_INPUT_CHAR_INDEX(pCtx, index); if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + SET_VAL(pCtx, 1, 1); memcpy(pCtx->aOutputBuf, pData, pCtx->inputBytes); TSKEY ts = pCtx->ptsList[index]; DO_UPDATE_TAG_COLUMNS(pCtx, ts); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); pResInfo->hasResult = DATA_SET_FLAG; pResInfo->complete = true; // set query completed @@ -1709,18 +1701,18 @@ static void last_function_f(SQLFunctionCtx *pCtx, int32_t index) { static void last_data_assign_impl(SQLFunctionCtx *pCtx, char *pData, int32_t index) { int64_t *timestamp = pCtx->ptsList; - + SFirstLastInfo *pInfo = (SFirstLastInfo *)(pCtx->aOutputBuf + pCtx->inputBytes); - + if (pInfo->hasResult != DATA_SET_FLAG || pInfo->ts < timestamp[index]) { #if defined(_DEBUG_VIEW) pTrace("assign index:%d, ts:%" PRId64 ", val:%d, ", index, timestamp[index], *(int32_t *)pData); #endif - + memcpy(pCtx->aOutputBuf, pData, pCtx->inputBytes); pInfo->hasResult = DATA_SET_FLAG; pInfo->ts = timestamp[index]; - + DO_UPDATE_TAG_COLUMNS(pCtx, pInfo->ts); } } @@ -1729,32 +1721,32 @@ static void last_dist_function(SQLFunctionCtx *pCtx) { if (pCtx->size == 0) { return; } - + /* * 1. for scan data in asc order, no need to check data * 2. for data blocks that are not loaded, no need to check data */ - if (!IS_DATA_BLOCK_LOADED(pCtx->blockStatus) || pCtx->order == TSQL_SO_ASC) { + if (pCtx->order == TSQL_SO_ASC) { return; } - + int32_t notNullElems = 0; - + for (int32_t i = pCtx->size - 1; i >= 0; --i) { char *data = GET_INPUT_CHAR_INDEX(pCtx, i); if (pCtx->hasNull && isNull(data, pCtx->inputType)) { continue; } - + last_data_assign_impl(pCtx, data, i); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); pResInfo->hasResult = DATA_SET_FLAG; - + notNullElems++; break; } - + SET_VAL(pCtx, notNullElems, 1); } @@ -1762,12 +1754,12 @@ static void last_dist_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->size == 0) { return; } - + char *pData = GET_INPUT_CHAR_INDEX(pCtx, index); if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + /* * 1. for scan data in asc order, no need to check data * 2. for data blocks that are not loaded, no need to check data @@ -1775,28 +1767,28 @@ static void last_dist_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->order == TSQL_SO_ASC) { return; } - + last_data_assign_impl(pCtx, pData, index); - + SET_VAL(pCtx, 1, 1); } static void last_dist_func_merge(SQLFunctionCtx *pCtx) { char *pData = GET_INPUT_CHAR(pCtx); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); assert(pCtx->size == 1 && pResInfo->superTableQ); - + // the input data is null SFirstLastInfo *pInput = (SFirstLastInfo *)(pData + pCtx->inputBytes); if (pInput->hasResult != DATA_SET_FLAG) { return; } - + SFirstLastInfo *pOutput = (SFirstLastInfo *)(pCtx->aOutputBuf + pCtx->inputBytes); if (pOutput->hasResult != DATA_SET_FLAG || pOutput->ts < pInput->ts) { memcpy(pCtx->aOutputBuf, pData, pCtx->inputBytes + sizeof(SFirstLastInfo)); - + DO_UPDATE_TAG_COLUMNS(pCtx, pInput->ts); } } @@ -1808,12 +1800,12 @@ static void last_dist_func_merge(SQLFunctionCtx *pCtx) { */ static void last_dist_func_second_merge(SQLFunctionCtx *pCtx) { char *pData = GET_INPUT_CHAR(pCtx); - + SFirstLastInfo *pInput = (SFirstLastInfo*) (pData + pCtx->outputBytes); if (pInput->hasResult != DATA_SET_FLAG) { return; } - + /* * param[1] used to keep the corresponding timestamp to decide if current result is * the true last result @@ -1822,10 +1814,10 @@ static void last_dist_func_second_merge(SQLFunctionCtx *pCtx) { memcpy(pCtx->aOutputBuf, pData, pCtx->outputBytes); pCtx->param[1].i64Key = pInput->ts; pCtx->param[1].nType = pCtx->outputType; - + DO_UPDATE_TAG_COLUMNS(pCtx, pInput->ts); } - + SET_VAL(pCtx, 1, 1); GET_RES_INFO(pCtx)->hasResult = DATA_SET_FLAG; } @@ -1836,25 +1828,25 @@ static void last_dist_func_second_merge(SQLFunctionCtx *pCtx) { */ static void last_row_function(SQLFunctionCtx *pCtx) { assert(pCtx->size == 1); - + char *pData = GET_INPUT_CHAR(pCtx); assignVal(pCtx->aOutputBuf, pData, pCtx->inputBytes, pCtx->inputType); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + SLastrowInfo *pInfo = (SLastrowInfo *)pResInfo->interResultBuf; pInfo->ts = pCtx->param[0].i64Key; pInfo->hasResult = DATA_SET_FLAG; - + // set the result to final result buffer if (pResInfo->superTableQ) { SLastrowInfo *pInfo1 = (SLastrowInfo *)(pCtx->aOutputBuf + pCtx->inputBytes); pInfo1->ts = pCtx->param[0].i64Key; pInfo1->hasResult = DATA_SET_FLAG; - + DO_UPDATE_TAG_COLUMNS(pCtx, pInfo1->ts); } - + SET_VAL(pCtx, pCtx->size, 1); } @@ -1869,7 +1861,7 @@ static void last_row_finalizer(SQLFunctionCtx *pCtx) { } else { // do nothing } - + GET_RES_INFO(pCtx)->numOfRes = 1; doFinalizer(pCtx); } @@ -1880,7 +1872,7 @@ static void valuePairAssign(tValuePair *dst, int16_t type, const char *val, int6 dst->v.nType = type; dst->v.i64Key = *(int64_t *)val; dst->timestamp = tsKey; - + int32_t size = 0; if (stage == SECONDARY_STAGE_MERGE || stage == FIRST_STAGE_MERGE) { memcpy(dst->pTags, pTags, (size_t)pTagInfo->tagsLen); @@ -1908,20 +1900,20 @@ static void do_top_function_add(STopBotInfo *pInfo, int32_t maxLen, void *pData, SExtTagsInfo *pTagInfo, char *pTags, int16_t stage) { tVariant val = {0}; tVariantCreateFromBinary(&val, pData, tDataTypeDesc[type].nSize, type); - + tValuePair **pList = pInfo->res; assert(pList != NULL); if (pInfo->num < maxLen) { if (pInfo->num == 0 || ((type >= TSDB_DATA_TYPE_TINYINT && type <= TSDB_DATA_TYPE_BIGINT) && - val.i64Key >= pList[pInfo->num - 1]->v.i64Key) || + val.i64Key >= pList[pInfo->num - 1]->v.i64Key) || ((type >= TSDB_DATA_TYPE_FLOAT && type <= TSDB_DATA_TYPE_DOUBLE) && - val.dKey >= pList[pInfo->num - 1]->v.dKey)) { + val.dKey >= pList[pInfo->num - 1]->v.dKey)) { valuePairAssign(pList[pInfo->num], type, (const char*)&val.i64Key, ts, pTags, pTagInfo, stage); } else { int32_t i = pInfo->num - 1; - + if (type >= TSDB_DATA_TYPE_TINYINT && type <= TSDB_DATA_TYPE_BIGINT) { while (i >= 0 && pList[i]->v.i64Key > val.i64Key) { VALUEPAIRASSIGN(pList[i + 1], pList[i], pTagInfo->tagsLen); @@ -1933,14 +1925,14 @@ static void do_top_function_add(STopBotInfo *pInfo, int32_t maxLen, void *pData, i -= 1; } } - + valuePairAssign(pList[i + 1], type, (const char*) &val.i64Key, ts, pTags, pTagInfo, stage); } - + pInfo->num++; } else { int32_t i = 0; - + if (((type >= TSDB_DATA_TYPE_TINYINT && type <= TSDB_DATA_TYPE_BIGINT) && val.i64Key > pList[0]->v.i64Key) || ((type >= TSDB_DATA_TYPE_FLOAT && type <= TSDB_DATA_TYPE_DOUBLE) && val.dKey > pList[0]->v.dKey)) { // find the appropriate the slot position @@ -1955,7 +1947,7 @@ static void do_top_function_add(STopBotInfo *pInfo, int32_t maxLen, void *pData, i += 1; } } - + valuePairAssign(pList[i], type, (const char*) &val.i64Key, ts, pTags, pTagInfo, stage); } } @@ -1964,16 +1956,16 @@ static void do_top_function_add(STopBotInfo *pInfo, int32_t maxLen, void *pData, static void do_bottom_function_add(STopBotInfo *pInfo, int32_t maxLen, void *pData, int64_t ts, uint16_t type, SExtTagsInfo *pTagInfo, char *pTags, int16_t stage) { tValuePair **pList = pInfo->res; - + tVariant val = {0}; tVariantCreateFromBinary(&val, pData, tDataTypeDesc[type].nSize, type); - + if (pInfo->num < maxLen) { if (pInfo->num == 0) { valuePairAssign(pList[pInfo->num], type, (const char*) &val.i64Key, ts, pTags, pTagInfo, stage); } else { int32_t i = pInfo->num - 1; - + if (type >= TSDB_DATA_TYPE_TINYINT && type <= TSDB_DATA_TYPE_BIGINT) { while (i >= 0 && pList[i]->v.i64Key < val.i64Key) { VALUEPAIRASSIGN(pList[i + 1], pList[i], pTagInfo->tagsLen); @@ -1985,14 +1977,14 @@ static void do_bottom_function_add(STopBotInfo *pInfo, int32_t maxLen, void *pDa i -= 1; } } - + valuePairAssign(pList[i + 1], type, (const char*)&val.i64Key, ts, pTags, pTagInfo, stage); } - + pInfo->num++; } else { int32_t i = 0; - + if (((type >= TSDB_DATA_TYPE_TINYINT && type <= TSDB_DATA_TYPE_BIGINT) && val.i64Key < pList[0]->v.i64Key) || ((type >= TSDB_DATA_TYPE_FLOAT && type <= TSDB_DATA_TYPE_DOUBLE) && val.dKey < pList[0]->v.dKey)) { // find the appropriate the slot position @@ -2007,7 +1999,7 @@ static void do_bottom_function_add(STopBotInfo *pInfo, int32_t maxLen, void *pDa i += 1; } } - + valuePairAssign(pList[i], type, (const char*)&val.i64Key, ts, pTags, pTagInfo, stage); } } @@ -2016,7 +2008,7 @@ static void do_bottom_function_add(STopBotInfo *pInfo, int32_t maxLen, void *pDa static int32_t resAscComparFn(const void *pLeft, const void *pRight) { tValuePair *pLeftElem = *(tValuePair **)pLeft; tValuePair *pRightElem = *(tValuePair **)pRight; - + if (pLeftElem->timestamp == pRightElem->timestamp) { return 0; } else { @@ -2029,7 +2021,7 @@ static int32_t resDescComparFn(const void *pLeft, const void *pRight) { return - static int32_t resDataAscComparFn(const void *pLeft, const void *pRight) { tValuePair *pLeftElem = *(tValuePair **)pLeft; tValuePair *pRightElem = *(tValuePair **)pRight; - + int32_t type = pLeftElem->v.nType; if (type == TSDB_DATA_TYPE_FLOAT || type == TSDB_DATA_TYPE_DOUBLE) { if (pLeftElem->v.dKey == pRightElem->v.dKey) { @@ -2051,12 +2043,12 @@ static int32_t resDataDescComparFn(const void *pLeft, const void *pRight) { retu static void copyTopBotRes(SQLFunctionCtx *pCtx, int32_t type) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); STopBotInfo *pRes = pResInfo->interResultBuf; - + tValuePair **tvp = pRes->res; int32_t step = QUERY_ASC_FORWARD_STEP; int32_t len = GET_RES_INFO(pCtx)->numOfRes; - + switch (type) { case TSDB_DATA_TYPE_INT: { int32_t *output = (int32_t *)pCtx->aOutputBuf; @@ -2105,20 +2097,20 @@ static void copyTopBotRes(SQLFunctionCtx *pCtx, int32_t type) { return; } } - + // set the output timestamp of each record. TSKEY *output = pCtx->ptsOutputBuf; for (int32_t i = 0; i < len; ++i, output += step) { *output = tvp[i]->timestamp; } - + // set the corresponding tag data for each record // todo check malloc failure char **pData = calloc(pCtx->tagInfo.numOfTagCols, POINTER_BYTES); for (int32_t i = 0; i < pCtx->tagInfo.numOfTagCols; ++i) { pData[i] = pCtx->tagInfo.pTagCtxList[i]->aOutputBuf; } - + for (int32_t i = 0; i < len; ++i, output += step) { int16_t offset = 0; for (int32_t j = 0; j < pCtx->tagInfo.numOfTagCols; ++j) { @@ -2127,22 +2119,22 @@ static void copyTopBotRes(SQLFunctionCtx *pCtx, int32_t type) { pData[j] += pCtx->tagInfo.pTagCtxList[j]->outputBytes; } } - + tfree(pData); } bool top_bot_datablock_filter(SQLFunctionCtx *pCtx, int32_t functionId, char *minval, char *maxval) { STopBotInfo *pTopBotInfo = (STopBotInfo *)GET_RES_INFO(pCtx)->interResultBuf; - + int32_t numOfExistsRes = pTopBotInfo->num; - + // required number of results are not reached, continue load data block if (numOfExistsRes < pCtx->param[0].i64Key) { return true; } - + tValuePair *pRes = (tValuePair*) pTopBotInfo->res; - + if (functionId == TSDB_FUNC_TOP) { switch (pCtx->inputType) { case TSDB_DATA_TYPE_TINYINT: @@ -2190,7 +2182,7 @@ bool top_bot_datablock_filter(SQLFunctionCtx *pCtx, int32_t functionId, char *mi */ static STopBotInfo *getTopBotOutputInfo(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + // only the first_stage_merge is directly written data into final output buffer if (pResInfo->superTableQ && pCtx->currentStage != SECONDARY_STAGE_MERGE) { return (STopBotInfo*) pCtx->aOutputBuf; @@ -2208,11 +2200,11 @@ static STopBotInfo *getTopBotOutputInfo(SQLFunctionCtx *pCtx) { static void buildTopBotStruct(STopBotInfo *pTopBotInfo, SQLFunctionCtx *pCtx) { char *tmp = (char *)pTopBotInfo + sizeof(STopBotInfo); pTopBotInfo->res = (tValuePair**) tmp; - + tmp += POINTER_BYTES * pCtx->param[0].i64Key; - + size_t size = sizeof(tValuePair) + pCtx->tagInfo.tagsLen; - + for (int32_t i = 0; i < pCtx->param[0].i64Key; ++i) { pTopBotInfo->res[i] = (tValuePair*) tmp; pTopBotInfo->res[i]->pTags = tmp + sizeof(tValuePair); @@ -2224,36 +2216,36 @@ static bool top_bottom_function_setup(SQLFunctionCtx *pCtx) { if (!function_setup(pCtx)) { return false; } - + STopBotInfo *pInfo = getTopBotOutputInfo(pCtx); buildTopBotStruct(pInfo, pCtx); - + return true; } static void top_function(SQLFunctionCtx *pCtx) { int32_t notNullElems = 0; - + STopBotInfo *pRes = getTopBotOutputInfo(pCtx); assert(pRes->num >= 0); - + for (int32_t i = 0; i < pCtx->size; ++i) { char *data = GET_INPUT_CHAR_INDEX(pCtx, i); if (pCtx->hasNull && isNull(data, pCtx->inputType)) { continue; } - + notNullElems++; do_top_function_add(pRes, pCtx->param[0].i64Key, data, pCtx->ptsList[i], pCtx->inputType, &pCtx->tagInfo, NULL, 0); } - + if (!pCtx->hasNull) { assert(pCtx->size == notNullElems); } - + // treat the result as only one result SET_VAL(pCtx, notNullElems, 1); - + if (notNullElems > 0) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); pResInfo->hasResult = DATA_SET_FLAG; @@ -2265,34 +2257,34 @@ static void top_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + STopBotInfo *pRes = getTopBotOutputInfo(pCtx); assert(pRes->num >= 0); - + SET_VAL(pCtx, 1, 1); do_top_function_add(pRes, pCtx->param[0].i64Key, pData, pCtx->ptsList[index], pCtx->inputType, &pCtx->tagInfo, NULL, 0); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); pResInfo->hasResult = DATA_SET_FLAG; } static void top_func_merge(SQLFunctionCtx *pCtx) { char *input = GET_INPUT_CHAR(pCtx); - + STopBotInfo *pInput = (STopBotInfo *)input; if (pInput->num <= 0) { return; } - + // remmap the input buffer may cause the struct pointer invalid, so rebuild the STopBotInfo is necessary buildTopBotStruct(pInput, pCtx); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); assert(pResInfo->superTableQ && pCtx->outputType == TSDB_DATA_TYPE_BINARY && pCtx->size == 1); - + STopBotInfo *pOutput = getTopBotOutputInfo(pCtx); - + for (int32_t i = 0; i < pInput->num; ++i) { do_top_function_add(pOutput, pCtx->param[0].i64Key, &pInput->res[i]->v.i64Key, pInput->res[i]->timestamp, pCtx->inputType, &pCtx->tagInfo, pInput->res[i]->pTags, pCtx->currentStage); @@ -2301,20 +2293,20 @@ static void top_func_merge(SQLFunctionCtx *pCtx) { static void top_func_second_merge(SQLFunctionCtx *pCtx) { STopBotInfo *pInput = (STopBotInfo *)GET_INPUT_CHAR(pCtx); - + // construct the input data struct from binary data buildTopBotStruct(pInput, pCtx); - + STopBotInfo *pOutput = getTopBotOutputInfo(pCtx); - + // the intermediate result is binary, we only use the output data type for (int32_t i = 0; i < pInput->num; ++i) { do_top_function_add(pOutput, pCtx->param[0].i64Key, &pInput->res[i]->v.i64Key, pInput->res[i]->timestamp, pCtx->outputType, &pCtx->tagInfo, pInput->res[i]->pTags, pCtx->currentStage); } - + SET_VAL(pCtx, pInput->num, pOutput->num); - + if (pOutput->num > 0) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); pResInfo->hasResult = DATA_SET_FLAG; @@ -2323,27 +2315,27 @@ static void top_func_second_merge(SQLFunctionCtx *pCtx) { static void bottom_function(SQLFunctionCtx *pCtx) { int32_t notNullElems = 0; - + STopBotInfo *pRes = getTopBotOutputInfo(pCtx); - + for (int32_t i = 0; i < pCtx->size; ++i) { char *data = GET_INPUT_CHAR_INDEX(pCtx, i); if (pCtx->hasNull && isNull(data, pCtx->inputType)) { continue; } - + notNullElems++; do_bottom_function_add(pRes, pCtx->param[0].i64Key, data, pCtx->ptsList[i], pCtx->inputType, &pCtx->tagInfo, NULL, 0); } - + if (!pCtx->hasNull) { assert(pCtx->size == notNullElems); } - + // treat the result as only one result SET_VAL(pCtx, notNullElems, 1); - + if (notNullElems > 0) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); pResInfo->hasResult = DATA_SET_FLAG; @@ -2355,32 +2347,32 @@ static void bottom_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + STopBotInfo *pRes = getTopBotOutputInfo(pCtx); SET_VAL(pCtx, 1, 1); do_bottom_function_add(pRes, pCtx->param[0].i64Key, pData, pCtx->ptsList[index], pCtx->inputType, &pCtx->tagInfo, NULL, 0); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); pResInfo->hasResult = DATA_SET_FLAG; } static void bottom_func_merge(SQLFunctionCtx *pCtx) { char *input = GET_INPUT_CHAR(pCtx); - + STopBotInfo *pInput = (STopBotInfo *)input; if (pInput->num <= 0) { return; } - + // remmap the input buffer may cause the struct pointer invalid, so rebuild the STopBotInfo is necessary buildTopBotStruct(pInput, pCtx); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); assert(pResInfo->superTableQ && pCtx->outputType == TSDB_DATA_TYPE_BINARY && pCtx->size == 1); - + STopBotInfo *pOutput = getTopBotOutputInfo(pCtx); - + for (int32_t i = 0; i < pInput->num; ++i) { do_bottom_function_add(pOutput, pCtx->param[0].i64Key, &pInput->res[i]->v.i64Key, pInput->res[i]->timestamp, pCtx->inputType, &pCtx->tagInfo, pInput->res[i]->pTags, pCtx->currentStage); @@ -2389,20 +2381,20 @@ static void bottom_func_merge(SQLFunctionCtx *pCtx) { static void bottom_func_second_merge(SQLFunctionCtx *pCtx) { STopBotInfo *pInput = (STopBotInfo *)GET_INPUT_CHAR(pCtx); - + // construct the input data struct from binary data buildTopBotStruct(pInput, pCtx); - + STopBotInfo *pOutput = getTopBotOutputInfo(pCtx); - + // the intermediate result is binary, we only use the output data type for (int32_t i = 0; i < pInput->num; ++i) { do_bottom_function_add(pOutput, pCtx->param[0].i64Key, &pInput->res[i]->v.i64Key, pInput->res[i]->timestamp, pCtx->outputType, &pCtx->tagInfo, pInput->res[i]->pTags, pCtx->currentStage); } - + SET_VAL(pCtx, pInput->num, pOutput->num); - + if (pOutput->num > 0) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); pResInfo->hasResult = DATA_SET_FLAG; @@ -2411,17 +2403,17 @@ static void bottom_func_second_merge(SQLFunctionCtx *pCtx) { static void top_bottom_func_finalizer(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + // data in temporary list is less than the required number of results, not enough qualified number of results STopBotInfo *pRes = pResInfo->interResultBuf; if (pRes->num == 0) { // no result assert(pResInfo->hasResult != DATA_SET_FLAG); // TODO: } - + GET_RES_INFO(pCtx)->numOfRes = pRes->num; tValuePair **tvp = pRes->res; - + // user specify the order of output by sort the result according to timestamp if (pCtx->param[1].i64Key == PRIMARYKEY_TIMESTAMP_COL_INDEX) { __compar_fn_t comparator = (pCtx->param[2].i64Key == TSQL_SO_ASC) ? resAscComparFn : resDescComparFn; @@ -2430,7 +2422,7 @@ static void top_bottom_func_finalizer(SQLFunctionCtx *pCtx) { __compar_fn_t comparator = (pCtx->param[2].i64Key == TSQL_SO_ASC) ? resDataAscComparFn : resDataDescComparFn; qsort(tvp, pResInfo->numOfRes, POINTER_BYTES, comparator); } - + GET_TRUE_DATA_TYPE(); copyTopBotRes(pCtx, type); @@ -2441,42 +2433,42 @@ static void top_bottom_func_finalizer(SQLFunctionCtx *pCtx) { static bool percentile_function_setup(SQLFunctionCtx *pCtx) { const int32_t MAX_AVAILABLE_BUFFER_SIZE = 1 << 20; // 1MB const int32_t NUMOFCOLS = 1; - + if (!function_setup(pCtx)) { return false; } - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); SSchema field[1] = {{pCtx->inputType, "dummyCol", 0, pCtx->inputBytes}}; - + SColumnModel *pModel = createColumnModel(field, 1, 1000); int32_t orderIdx = 0; - + // tOrderDesc object tOrderDescriptor *pDesc = tOrderDesCreate(&orderIdx, NUMOFCOLS, pModel, TSQL_SO_DESC); - + ((SPercentileInfo *)(pResInfo->interResultBuf))->pMemBucket = tMemBucketCreate(1024, MAX_AVAILABLE_BUFFER_SIZE, pCtx->inputBytes, pCtx->inputType, pDesc); - + return true; } static void percentile_function(SQLFunctionCtx *pCtx) { int32_t notNullElems = 0; - + SResultInfo * pResInfo = GET_RES_INFO(pCtx); SPercentileInfo *pInfo = pResInfo->interResultBuf; - + for (int32_t i = 0; i < pCtx->size; ++i) { char *data = GET_INPUT_CHAR_INDEX(pCtx, i); if (pCtx->hasNull && isNull(data, pCtx->inputType)) { continue; } - + notNullElems += 1; tMemBucketPut(pInfo->pMemBucket, data, 1); } - + SET_VAL(pCtx, notNullElems, 1); pResInfo->hasResult = DATA_SET_FLAG; } @@ -2486,28 +2478,28 @@ static void percentile_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + SPercentileInfo *pInfo = (SPercentileInfo *)pResInfo->interResultBuf; tMemBucketPut(pInfo->pMemBucket, pData, 1); - + SET_VAL(pCtx, 1, 1); pResInfo->hasResult = DATA_SET_FLAG; } static void percentile_finalizer(SQLFunctionCtx *pCtx) { double v = pCtx->param[0].nType == TSDB_DATA_TYPE_INT ? pCtx->param[0].i64Key : pCtx->param[0].dKey; - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); tMemBucket * pMemBucket = ((SPercentileInfo *)pResInfo->interResultBuf)->pMemBucket; - + if (pMemBucket->numOfElems > 0) { // check for null *(double *)pCtx->aOutputBuf = getPercentile(pMemBucket, v); } else { setNull(pCtx->aOutputBuf, pCtx->outputType, pCtx->outputBytes); } - + tOrderDescDestroy(pMemBucket->pOrderDesc); tMemBucketDestroy(pMemBucket); @@ -2517,7 +2509,7 @@ static void percentile_finalizer(SQLFunctionCtx *pCtx) { ////////////////////////////////////////////////////////////////////////////////// static SAPercentileInfo *getAPerctInfo(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + if (pResInfo->superTableQ && pCtx->currentStage != SECONDARY_STAGE_MERGE) { return (SAPercentileInfo*) pCtx->aOutputBuf; } else { @@ -2529,9 +2521,9 @@ static bool apercentile_function_setup(SQLFunctionCtx *pCtx) { if (!function_setup(pCtx)) { return false; } - + SAPercentileInfo *pInfo = getAPerctInfo(pCtx); - + char *tmp = (char *)pInfo + sizeof(SAPercentileInfo); pInfo->pHisto = tHistogramCreateFrom(tmp, MAX_HISTOGRAM_BIN); return true; @@ -2539,19 +2531,19 @@ static bool apercentile_function_setup(SQLFunctionCtx *pCtx) { static void apercentile_function(SQLFunctionCtx *pCtx) { int32_t notNullElems = 0; - + SResultInfo * pResInfo = GET_RES_INFO(pCtx); SAPercentileInfo *pInfo = getAPerctInfo(pCtx); - + for (int32_t i = 0; i < pCtx->size; ++i) { char *data = GET_INPUT_CHAR_INDEX(pCtx, i); if (pCtx->hasNull && isNull(data, pCtx->inputType)) { continue; } - + notNullElems += 1; double v = 0; - + switch (pCtx->inputType) { case TSDB_DATA_TYPE_TINYINT: v = GET_INT8_VAL(data); @@ -2572,16 +2564,16 @@ static void apercentile_function(SQLFunctionCtx *pCtx) { v = GET_INT32_VAL(data); break; } - + tHistogramAdd(&pInfo->pHisto, v); } - + if (!pCtx->hasNull) { assert(pCtx->size == notNullElems); } - + SET_VAL(pCtx, notNullElems, 1); - + if (notNullElems > 0) { pResInfo->hasResult = DATA_SET_FLAG; } @@ -2592,10 +2584,10 @@ static void apercentile_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + SResultInfo * pResInfo = GET_RES_INFO(pCtx); SAPercentileInfo *pInfo = getAPerctInfo(pCtx); // pResInfo->interResultBuf; - + double v = 0; switch (pCtx->inputType) { case TSDB_DATA_TYPE_TINYINT: @@ -2617,9 +2609,9 @@ static void apercentile_function_f(SQLFunctionCtx *pCtx, int32_t index) { v = GET_INT32_VAL(pData); break; } - + tHistogramAdd(&pInfo->pHisto, v); - + SET_VAL(pCtx, 1, 1); pResInfo->hasResult = DATA_SET_FLAG; } @@ -2627,62 +2619,62 @@ static void apercentile_function_f(SQLFunctionCtx *pCtx, int32_t index) { static void apercentile_func_merge(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); assert(pResInfo->superTableQ); - + SAPercentileInfo *pInput = (SAPercentileInfo *)GET_INPUT_CHAR(pCtx); - + pInput->pHisto = (SHistogramInfo*) ((char *)pInput + sizeof(SAPercentileInfo)); pInput->pHisto->elems = (SHistBin*) ((char *)pInput->pHisto + sizeof(SHistogramInfo)); - + if (pInput->pHisto->numOfElems <= 0) { return; } - + size_t size = sizeof(SHistogramInfo) + sizeof(SHistBin) * (MAX_HISTOGRAM_BIN + 1); - + SAPercentileInfo *pOutput = getAPerctInfo(pCtx); //(SAPercentileInfo *)pCtx->aOutputBuf; SHistogramInfo * pHisto = pOutput->pHisto; - + if (pHisto->numOfElems <= 0) { memcpy(pHisto, pInput->pHisto, size); pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); } else { pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); - + SHistogramInfo *pRes = tHistogramMerge(pHisto, pInput->pHisto, MAX_HISTOGRAM_BIN); memcpy(pHisto, pRes, sizeof(SHistogramInfo) + sizeof(SHistBin) * MAX_HISTOGRAM_BIN); pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); - + tHistogramDestroy(&pRes); } - + SET_VAL(pCtx, 1, 1); pResInfo->hasResult = DATA_SET_FLAG; } static void apercentile_func_second_merge(SQLFunctionCtx *pCtx) { SAPercentileInfo *pInput = (SAPercentileInfo *)GET_INPUT_CHAR(pCtx); - + pInput->pHisto = (SHistogramInfo*) ((char *)pInput + sizeof(SAPercentileInfo)); pInput->pHisto->elems = (SHistBin*) ((char *)pInput->pHisto + sizeof(SHistogramInfo)); - + if (pInput->pHisto->numOfElems <= 0) { return; } - + SAPercentileInfo *pOutput = getAPerctInfo(pCtx); SHistogramInfo * pHisto = pOutput->pHisto; - + if (pHisto->numOfElems <= 0) { memcpy(pHisto, pInput->pHisto, sizeof(SHistogramInfo) + sizeof(SHistBin) * (MAX_HISTOGRAM_BIN + 1)); pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); } else { pHisto->elems = (SHistBin*) ((char *)pHisto + sizeof(SHistogramInfo)); - + SHistogramInfo *pRes = tHistogramMerge(pHisto, pInput->pHisto, MAX_HISTOGRAM_BIN); tHistogramDestroy(&pOutput->pHisto); pOutput->pHisto = pRes; } - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); pResInfo->hasResult = DATA_SET_FLAG; SET_VAL(pCtx, 1, 1); @@ -2690,17 +2682,17 @@ static void apercentile_func_second_merge(SQLFunctionCtx *pCtx) { static void apercentile_finalizer(SQLFunctionCtx *pCtx) { double v = (pCtx->param[0].nType == TSDB_DATA_TYPE_INT) ? pCtx->param[0].i64Key : pCtx->param[0].dKey; - + SResultInfo * pResInfo = GET_RES_INFO(pCtx); SAPercentileInfo *pOutput = pResInfo->interResultBuf; - + if (pCtx->currentStage == SECONDARY_STAGE_MERGE) { if (pResInfo->hasResult == DATA_SET_FLAG) { // check for null assert(pOutput->pHisto->numOfElems > 0); - + double ratio[] = {v}; double *res = tHistogramUniform(pOutput->pHisto, ratio, 1); - + memcpy(pCtx->aOutputBuf, res, sizeof(double)); free(res); } else { @@ -2710,7 +2702,7 @@ static void apercentile_finalizer(SQLFunctionCtx *pCtx) { } else { if (pOutput->pHisto->numOfElems > 0) { double ratio[] = {v}; - + double *res = tHistogramUniform(pOutput->pHisto, ratio, 1); memcpy(pCtx->aOutputBuf, res, sizeof(double)); free(res); @@ -2728,10 +2720,10 @@ static bool leastsquares_function_setup(SQLFunctionCtx *pCtx) { if (!function_setup(pCtx)) { return false; } - + SResultInfo * pResInfo = GET_RES_INFO(pCtx); SLeastsquareInfo *pInfo = pResInfo->interResultBuf; - + // 2*3 matrix pInfo->startVal = pCtx->param[0].dKey; return true; @@ -2758,12 +2750,12 @@ static bool leastsquares_function_setup(SQLFunctionCtx *pCtx) { static void leastsquares_function(SQLFunctionCtx *pCtx) { SResultInfo * pResInfo = GET_RES_INFO(pCtx); SLeastsquareInfo *pInfo = pResInfo->interResultBuf; - + double(*param)[3] = pInfo->mat; double x = pInfo->startVal; - + void *pData = GET_INPUT_CHAR(pCtx); - + int32_t numOfElem = 0; switch (pCtx->inputType) { case TSDB_DATA_TYPE_INT: { @@ -2773,12 +2765,12 @@ static void leastsquares_function(SQLFunctionCtx *pCtx) { if (pCtx->hasNull && isNull((const char*) p, pCtx->inputType)) { continue; } - + param[0][0] += x * x; param[0][1] += x; param[0][2] += x * p[i]; param[1][2] += p[i]; - + x += pCtx->param[1].dKey; numOfElem++; } @@ -2810,14 +2802,14 @@ static void leastsquares_function(SQLFunctionCtx *pCtx) { break; }; } - + pInfo->startVal = x; pInfo->num += numOfElem; - + if (pInfo->num > 0) { pResInfo->hasResult = DATA_SET_FLAG; } - + SET_VAL(pCtx, numOfElem, 1); } @@ -2826,12 +2818,12 @@ static void leastsquares_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + SResultInfo * pResInfo = GET_RES_INFO(pCtx); SLeastsquareInfo *pInfo = pResInfo->interResultBuf; - + double(*param)[3] = pInfo->mat; - + switch (pCtx->inputType) { case TSDB_DATA_TYPE_INT: { int32_t *p = pData; @@ -2866,10 +2858,10 @@ static void leastsquares_function_f(SQLFunctionCtx *pCtx, int32_t index) { default: pError("error data type in leastsquare function:%d", pCtx->inputType); }; - + SET_VAL(pCtx, 1, 1); pInfo->num += 1; - + if (pInfo->num > 0) { pResInfo->hasResult = DATA_SET_FLAG; } @@ -2879,26 +2871,26 @@ static void leastsquares_finalizer(SQLFunctionCtx *pCtx) { // no data in query SResultInfo * pResInfo = GET_RES_INFO(pCtx); SLeastsquareInfo *pInfo = pResInfo->interResultBuf; - + if (pInfo->num == 0) { setNull(pCtx->aOutputBuf, pCtx->outputType, pCtx->outputBytes); return; } - + double(*param)[3] = pInfo->mat; - + param[1][1] = pInfo->num; param[1][0] = param[0][1]; - + param[0][0] -= param[1][0] * (param[0][1] / param[1][1]); param[0][2] -= param[1][2] * (param[0][1] / param[1][1]); param[0][1] = 0; param[1][2] -= param[0][2] * (param[1][0] / param[0][0]); param[1][0] = 0; param[0][2] /= param[0][0]; - + param[1][2] /= param[1][1]; - + sprintf(pCtx->aOutputBuf, "(%lf, %lf)", param[0][2], param[1][2]); doFinalizer(pCtx); } @@ -2907,7 +2899,7 @@ static void date_col_output_function(SQLFunctionCtx *pCtx) { if (pCtx->scanFlag == SUPPLEMENTARY_SCAN) { return; } - + SET_VAL(pCtx, pCtx->size, 1); *(int64_t *)(pCtx->aOutputBuf) = pCtx->nStartQueryTimestamp; } @@ -2918,32 +2910,32 @@ static FORCE_INLINE void date_col_output_function_f(SQLFunctionCtx *pCtx, int32_ static void col_project_function(SQLFunctionCtx *pCtx) { INC_INIT_VAL(pCtx, pCtx->size); - + char *pData = GET_INPUT_CHAR(pCtx); if (pCtx->order == TSQL_SO_ASC) { memcpy(pCtx->aOutputBuf, pData, (size_t)pCtx->size * pCtx->inputBytes); } else { for(int32_t i = 0; i < pCtx->size; ++i) { memcpy(pCtx->aOutputBuf + (pCtx->size - 1 - i) * pCtx->inputBytes, pData + i * pCtx->inputBytes, - pCtx->inputBytes); + pCtx->inputBytes); } } - + pCtx->aOutputBuf += pCtx->size * pCtx->outputBytes; } static void col_project_function_f(SQLFunctionCtx *pCtx, int32_t index) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + // only one output if (pCtx->param[0].i64Key == 1 && pResInfo->numOfRes >= 1) { return; } - + INC_INIT_VAL(pCtx, 1); char *pData = GET_INPUT_CHAR_INDEX(pCtx, index); memcpy(pCtx->aOutputBuf, pData, pCtx->inputBytes); - + pCtx->aOutputBuf += pCtx->inputBytes/* * GET_FORWARD_DIRECTION_FACTOR(pCtx->order)*/; } @@ -2954,9 +2946,9 @@ static void col_project_function_f(SQLFunctionCtx *pCtx, int32_t index) { */ static void tag_project_function(SQLFunctionCtx *pCtx) { INC_INIT_VAL(pCtx, pCtx->size); - + assert(pCtx->inputBytes == pCtx->outputBytes); - + for (int32_t i = 0; i < pCtx->size; ++i) { tVariantDump(&pCtx->tag, pCtx->aOutputBuf, pCtx->outputType); pCtx->aOutputBuf += pCtx->outputBytes; @@ -2988,7 +2980,7 @@ static void tag_function_f(SQLFunctionCtx *pCtx, int32_t index) { static void copy_function(SQLFunctionCtx *pCtx) { SET_VAL(pCtx, pCtx->size, 1); - + char *pData = GET_INPUT_CHAR(pCtx); assignVal(pCtx->aOutputBuf, pData, pCtx->inputBytes, pCtx->inputType); } @@ -3001,7 +2993,7 @@ static bool diff_function_setup(SQLFunctionCtx *pCtx) { if (function_setup(pCtx)) { return false; } - + // diff function require the value is set to -1 pCtx->param[1].nType = INITIAL_VALUE_NOT_ASSIGNED; return false; @@ -3011,41 +3003,41 @@ static bool diff_function_setup(SQLFunctionCtx *pCtx) { static void diff_function(SQLFunctionCtx *pCtx) { void *data = GET_INPUT_CHAR(pCtx); bool isFirstBlock = (pCtx->param[1].nType == INITIAL_VALUE_NOT_ASSIGNED); - + int32_t notNullElems = 0; - + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pCtx->order); int32_t i = (pCtx->order == TSQL_SO_ASC) ? 0 : pCtx->size - 1; TSKEY * pTimestamp = pCtx->ptsOutputBuf; - + switch (pCtx->inputType) { case TSDB_DATA_TYPE_INT: { int32_t *pData = (int32_t *)data; int32_t *pOutput = (int32_t *)pCtx->aOutputBuf; - + for (; i < pCtx->size && i >= 0; i += step) { if (pCtx->hasNull && isNull((const char*) &pData[i], pCtx->inputType)) { continue; } - + if (pCtx->param[1].nType == INITIAL_VALUE_NOT_ASSIGNED) { // initial value is not set yet pCtx->param[1].i64Key = pData[i]; pCtx->param[1].nType = pCtx->inputType; } else if ((i == 0 && pCtx->order == TSQL_SO_ASC) || (i == pCtx->size - 1 && pCtx->order == TSQL_SO_DESC)) { *pOutput = pData[i] - pCtx->param[1].i64Key; *pTimestamp = pCtx->ptsList[i]; - + pOutput += 1; pTimestamp += 1; } else { *pOutput = pData[i] - pData[i - step]; *pTimestamp = pCtx->ptsList[i]; - + pOutput += 1; pTimestamp += 1; } - + pCtx->param[1].i64Key = pData[i]; pCtx->param[1].nType = pCtx->inputType; notNullElems++; @@ -3055,29 +3047,29 @@ static void diff_function(SQLFunctionCtx *pCtx) { case TSDB_DATA_TYPE_BIGINT: { int64_t *pData = (int64_t *)data; int64_t *pOutput = (int64_t *)pCtx->aOutputBuf; - + for (; i < pCtx->size && i >= 0; i += step) { if (pCtx->hasNull && isNull((const char*) &pData[i], pCtx->inputType)) { continue; } - + if (pCtx->param[1].nType == INITIAL_VALUE_NOT_ASSIGNED) { // initial value is not set yet pCtx->param[1].i64Key = pData[i]; pCtx->param[1].nType = pCtx->inputType; } else if ((i == 0 && pCtx->order == TSQL_SO_ASC) || (i == pCtx->size - 1 && pCtx->order == TSQL_SO_DESC)) { *pOutput = pData[i] - pCtx->param[1].i64Key; *pTimestamp = pCtx->ptsList[i]; - + pOutput += 1; pTimestamp += 1; } else { *pOutput = pData[i] - pData[i - step]; *pTimestamp = pCtx->ptsList[i]; - + pOutput += 1; pTimestamp += 1; } - + pCtx->param[1].i64Key = pData[i]; pCtx->param[1].nType = pCtx->inputType; notNullElems++; @@ -3087,12 +3079,12 @@ static void diff_function(SQLFunctionCtx *pCtx) { case TSDB_DATA_TYPE_DOUBLE: { double *pData = (double *)data; double *pOutput = (double *)pCtx->aOutputBuf; - + for (; i < pCtx->size && i >= 0; i += step) { if (pCtx->hasNull && isNull((const char*) &pData[i], pCtx->inputType)) { continue; } - + if (pCtx->param[1].nType == INITIAL_VALUE_NOT_ASSIGNED) { // initial value is not set yet pCtx->param[1].dKey = pData[i]; pCtx->param[1].nType = pCtx->inputType; @@ -3107,7 +3099,7 @@ static void diff_function(SQLFunctionCtx *pCtx) { pOutput += 1; pTimestamp += 1; } - + pCtx->param[1].dKey = pData[i]; pCtx->param[1].nType = pCtx->inputType; notNullElems++; @@ -3117,12 +3109,12 @@ static void diff_function(SQLFunctionCtx *pCtx) { case TSDB_DATA_TYPE_FLOAT: { float *pData = (float *)data; float *pOutput = (float *)pCtx->aOutputBuf; - + for (; i < pCtx->size && i >= 0; i += step) { if (pCtx->hasNull && isNull((const char*) &pData[i], pCtx->inputType)) { continue; } - + if (pCtx->param[1].nType == INITIAL_VALUE_NOT_ASSIGNED) { // initial value is not set yet pCtx->param[1].dKey = pData[i]; pCtx->param[1].nType = pCtx->inputType; @@ -3139,7 +3131,7 @@ static void diff_function(SQLFunctionCtx *pCtx) { pOutput += 1; pTimestamp += 1; } - + // keep the last value, the remain may be all null pCtx->param[1].dKey = pData[i]; pCtx->param[1].nType = pCtx->inputType; @@ -3150,12 +3142,12 @@ static void diff_function(SQLFunctionCtx *pCtx) { case TSDB_DATA_TYPE_SMALLINT: { int16_t *pData = (int16_t *)data; int16_t *pOutput = (int16_t *)pCtx->aOutputBuf; - + for (; i < pCtx->size && i >= 0; i += step) { if (pCtx->hasNull && isNull((const char*) &pData[i], pCtx->inputType)) { continue; } - + if (pCtx->param[1].nType == INITIAL_VALUE_NOT_ASSIGNED) { // initial value is not set yet pCtx->param[1].i64Key = pData[i]; pCtx->param[1].nType = pCtx->inputType; @@ -3171,7 +3163,7 @@ static void diff_function(SQLFunctionCtx *pCtx) { pOutput += 1; pTimestamp += 1; } - + pCtx->param[1].i64Key = pData[i]; pCtx->param[1].nType = pCtx->inputType; notNullElems++; @@ -3181,12 +3173,12 @@ static void diff_function(SQLFunctionCtx *pCtx) { case TSDB_DATA_TYPE_TINYINT: { int8_t *pData = (int8_t *)data; int8_t *pOutput = (int8_t *)pCtx->aOutputBuf; - + for (; i < pCtx->size && i >= 0; i += step) { if (pCtx->hasNull && isNull((char *)&pData[i], pCtx->inputType)) { continue; } - + if (pCtx->param[1].nType == INITIAL_VALUE_NOT_ASSIGNED) { // initial value is not set yet pCtx->param[1].i64Key = pData[i]; pCtx->param[1].nType = pCtx->inputType; @@ -3203,7 +3195,7 @@ static void diff_function(SQLFunctionCtx *pCtx) { pOutput += 1; pTimestamp += 1; } - + pCtx->param[1].i64Key = pData[i]; pCtx->param[1].nType = pCtx->inputType; notNullElems++; @@ -3213,7 +3205,7 @@ static void diff_function(SQLFunctionCtx *pCtx) { default: pError("error input type"); } - + // initial value is not set yet if (pCtx->param[1].nType == INITIAL_VALUE_NOT_ASSIGNED || notNullElems <= 0) { /* @@ -3223,9 +3215,9 @@ static void diff_function(SQLFunctionCtx *pCtx) { assert(pCtx->hasNull); } else { int32_t forwardStep = (isFirstBlock) ? notNullElems - 1 : notNullElems; - + GET_RES_INFO(pCtx)->numOfRes += forwardStep; - + pCtx->aOutputBuf += forwardStep * pCtx->outputBytes; pCtx->ptsOutputBuf = (char*)pCtx->ptsOutputBuf + forwardStep * TSDB_KEYSIZE; } @@ -3248,14 +3240,14 @@ static void diff_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + // the output start from the second source element if (pCtx->param[1].nType != INITIAL_VALUE_NOT_ASSIGNED) { // initial value is set GET_RES_INFO(pCtx)->numOfRes += 1; } - + int32_t step = 1/*GET_FORWARD_DIRECTION_FACTOR(pCtx->order)*/; - + switch (pCtx->inputType) { case TSDB_DATA_TYPE_INT: { if (pCtx->param[1].nType == INITIAL_VALUE_NOT_ASSIGNED) { // initial value is not set yet @@ -3291,7 +3283,7 @@ static void diff_function_f(SQLFunctionCtx *pCtx, int32_t index) { default: pError("error input type"); } - + if (GET_RES_INFO(pCtx)->numOfRes > 0) { pCtx->aOutputBuf += pCtx->outputBytes * step; pCtx->ptsOutputBuf = (char *)pCtx->ptsOutputBuf + TSDB_KEYSIZE * step; @@ -3300,17 +3292,17 @@ static void diff_function_f(SQLFunctionCtx *pCtx, int32_t index) { char *arithmetic_callback_function(void *param, char *name, int32_t colId) { SArithmeticSupport *pSupport = (SArithmeticSupport *)param; - + SSqlFunctionExpr *pExpr = pSupport->pExpr; int32_t colIndexInBuf = -1; - - for (int32_t i = 0; i < pExpr->pBinExprInfo.numOfCols; ++i) { - if (colId == pExpr->pBinExprInfo.pReqColumns[i].colId) { - colIndexInBuf = pExpr->pBinExprInfo.pReqColumns[i].colIdxInBuf; + + for (int32_t i = 0; i < pExpr->binExprInfo.numOfCols; ++i) { + if (colId == pExpr->binExprInfo.pReqColumns[i].colId) { + colIndexInBuf = pExpr->binExprInfo.pReqColumns[i].colIdxInBuf; break; } } - + assert(colIndexInBuf >= 0 && colId >= 0); return pSupport->data[colIndexInBuf] + pSupport->offset * pSupport->elemSize[colIndexInBuf]; } @@ -3318,10 +3310,10 @@ char *arithmetic_callback_function(void *param, char *name, int32_t colId) { static void arithmetic_function(SQLFunctionCtx *pCtx) { GET_RES_INFO(pCtx)->numOfRes += pCtx->size; SArithmeticSupport *sas = (SArithmeticSupport *)pCtx->param[1].pz; - - tSQLBinaryExprCalcTraverse(sas->pExpr->pBinExprInfo.pBinExpr, pCtx->size, pCtx->aOutputBuf, sas, pCtx->order, + + tSQLBinaryExprCalcTraverse(sas->pExpr->binExprInfo.pBinExpr, pCtx->size, pCtx->aOutputBuf, sas, pCtx->order, arithmetic_callback_function); - + pCtx->aOutputBuf += pCtx->outputBytes * pCtx->size; pCtx->param[1].pz = NULL; } @@ -3329,11 +3321,11 @@ static void arithmetic_function(SQLFunctionCtx *pCtx) { static void arithmetic_function_f(SQLFunctionCtx *pCtx, int32_t index) { INC_INIT_VAL(pCtx, 1); SArithmeticSupport *sas = (SArithmeticSupport *)pCtx->param[1].pz; - + sas->offset = index; - tSQLBinaryExprCalcTraverse(sas->pExpr->pBinExprInfo.pBinExpr, 1, pCtx->aOutputBuf, sas, pCtx->order, + tSQLBinaryExprCalcTraverse(sas->pExpr->binExprInfo.pBinExpr, 1, pCtx->aOutputBuf, sas, pCtx->order, arithmetic_callback_function); - + pCtx->aOutputBuf += pCtx->outputBytes/* * GET_FORWARD_DIRECTION_FACTOR(pCtx->order)*/; } @@ -3359,9 +3351,9 @@ static bool spread_function_setup(SQLFunctionCtx *pCtx) { if (!function_setup(pCtx)) { return false; } - + SSpreadInfo *pInfo = GET_RES_INFO(pCtx)->interResultBuf; - + // this is the server-side setup function in client-side, the secondary merge do not need this procedure if (pCtx->currentStage == SECONDARY_STAGE_MERGE) { pCtx->param[0].dKey = DBL_MAX; @@ -3370,60 +3362,56 @@ static bool spread_function_setup(SQLFunctionCtx *pCtx) { pInfo->min = DBL_MAX; pInfo->max = -DBL_MAX; } - + return true; } static void spread_function(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); SSpreadInfo *pInfo = pResInfo->interResultBuf; - + int32_t numOfElems = pCtx->size; - + // column missing cause the hasNull to be true - if (!IS_DATA_BLOCK_LOADED(pCtx->blockStatus)) { - if (pCtx->preAggVals.isSet) { - numOfElems = pCtx->size - pCtx->preAggVals.numOfNull; - - // all data are null in current data block, ignore current data block - if (numOfElems == 0) { - goto _spread_over; + if (usePreVal(pCtx)) { + numOfElems = pCtx->size - pCtx->preAggVals.statis.numOfNull; + + // all data are null in current data block, ignore current data block + if (numOfElems == 0) { + goto _spread_over; + } + + if ((pCtx->inputType >= TSDB_DATA_TYPE_TINYINT && pCtx->inputType <= TSDB_DATA_TYPE_BIGINT) || + (pCtx->inputType == TSDB_DATA_TYPE_TIMESTAMP)) { + if (pInfo->min > pCtx->preAggVals.statis.min) { + pInfo->min = pCtx->preAggVals.statis.min; } - - if ((pCtx->inputType >= TSDB_DATA_TYPE_TINYINT && pCtx->inputType <= TSDB_DATA_TYPE_BIGINT) || - (pCtx->inputType == TSDB_DATA_TYPE_TIMESTAMP)) { - if (pInfo->min > pCtx->preAggVals.min) { - pInfo->min = pCtx->preAggVals.min; - } - - if (pInfo->max < pCtx->preAggVals.max) { - pInfo->max = pCtx->preAggVals.max; - } - } else if (pCtx->inputType == TSDB_DATA_TYPE_DOUBLE || pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { - if (pInfo->min > GET_DOUBLE_VAL(&(pCtx->preAggVals.min))) { - pInfo->min = GET_DOUBLE_VAL(&(pCtx->preAggVals.min)); - } - - if (pInfo->max < GET_DOUBLE_VAL(&(pCtx->preAggVals.max))) { - pInfo->max = GET_DOUBLE_VAL(&(pCtx->preAggVals.max)); - } + + if (pInfo->max < pCtx->preAggVals.statis.max) { + pInfo->max = pCtx->preAggVals.statis.max; } - } else { - if (pInfo->min > pCtx->param[1].dKey) { - pInfo->min = pCtx->param[1].dKey; + } else if (pCtx->inputType == TSDB_DATA_TYPE_DOUBLE || pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { + if (pInfo->min > GET_DOUBLE_VAL(&(pCtx->preAggVals.statis.min))) { + pInfo->min = GET_DOUBLE_VAL(&(pCtx->preAggVals.statis.min)); } - - if (pInfo->max < pCtx->param[2].dKey) { - pInfo->max = pCtx->param[2].dKey; + + if (pInfo->max < GET_DOUBLE_VAL(&(pCtx->preAggVals.statis.max))) { + pInfo->max = GET_DOUBLE_VAL(&(pCtx->preAggVals.statis.max)); } } - - goto _spread_over; + } else { + if (pInfo->min > pCtx->param[1].dKey) { + pInfo->min = pCtx->param[1].dKey; + } + + if (pInfo->max < pCtx->param[2].dKey) { + pInfo->max = pCtx->param[2].dKey; + } } - + void *pData = GET_INPUT_CHAR(pCtx); numOfElems = 0; - + if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { LIST_MINMAX_N(pCtx, pInfo->min, pInfo->max, pCtx->size, pData, int8_t, pCtx->inputType, numOfElems); } else if (pCtx->inputType == TSDB_DATA_TYPE_SMALLINT) { @@ -3437,19 +3425,19 @@ static void spread_function(SQLFunctionCtx *pCtx) { } else if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { LIST_MINMAX_N(pCtx, pInfo->min, pInfo->max, pCtx->size, pData, float, pCtx->inputType, numOfElems); } - + if (!pCtx->hasNull) { assert(pCtx->size == numOfElems); } - -_spread_over: + + _spread_over: SET_VAL(pCtx, numOfElems, 1); - + if (numOfElems > 0) { pResInfo->hasResult = DATA_SET_FLAG; pInfo->hasResult = DATA_SET_FLAG; } - + // keep the data into the final output buffer for super table query since this execution may be the last one if (pResInfo->superTableQ) { memcpy(pCtx->aOutputBuf, pResInfo->interResultBuf, sizeof(SSpreadInfo)); @@ -3461,12 +3449,12 @@ static void spread_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + SET_VAL(pCtx, 1, 1); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); SSpreadInfo *pInfo = pResInfo->interResultBuf; - + double val = 0.0; if (pCtx->inputType == TSDB_DATA_TYPE_TINYINT) { val = GET_INT8_VAL(pData); @@ -3481,19 +3469,19 @@ static void spread_function_f(SQLFunctionCtx *pCtx, int32_t index) { } else if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT) { val = GET_FLOAT_VAL(pData); } - + // keep the result data in output buffer, not in the intermediate buffer if (val > pInfo->max) { pInfo->max = val; } - + if (val < pInfo->min) { pInfo->min = val; } - + pResInfo->hasResult = DATA_SET_FLAG; pInfo->hasResult = DATA_SET_FLAG; - + if (pResInfo->superTableQ) { memcpy(pCtx->aOutputBuf, pResInfo->interResultBuf, sizeof(SSpreadInfo)); } @@ -3502,30 +3490,30 @@ static void spread_function_f(SQLFunctionCtx *pCtx, int32_t index) { void spread_func_merge(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); assert(pResInfo->superTableQ); - + SSpreadInfo *pResData = pResInfo->interResultBuf; - + int32_t notNullElems = 0; for (int32_t i = 0; i < pCtx->size; ++i) { SSpreadInfo *input = (SSpreadInfo *)GET_INPUT_CHAR_INDEX(pCtx, i); - + /* no assign tag, the value is null */ if (input->hasResult != DATA_SET_FLAG) { continue; } - + if (pResData->min > input->min) { pResData->min = input->min; } - + if (pResData->max < input->max) { pResData->max = input->max; } - + pResData->hasResult = DATA_SET_FLAG; notNullElems++; } - + if (notNullElems > 0) { memcpy(pCtx->aOutputBuf, pResInfo->interResultBuf, sizeof(SSpreadInfo)); pResInfo->hasResult = DATA_SET_FLAG; @@ -3541,15 +3529,15 @@ void spread_func_sec_merge(SQLFunctionCtx *pCtx) { if (pData->hasResult != DATA_SET_FLAG) { return; } - + if (pCtx->param[0].dKey > pData->min) { pCtx->param[0].dKey = pData->min; } - + if (pCtx->param[3].dKey < pData->max) { pCtx->param[3].dKey = pData->max; } - + GET_RES_INFO(pCtx)->hasResult = DATA_SET_FLAG; } @@ -3559,26 +3547,26 @@ void spread_function_finalizer(SQLFunctionCtx *pCtx) { * the type of intermediate data is binary */ SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + if (pCtx->currentStage == SECONDARY_STAGE_MERGE) { assert(pCtx->inputType == TSDB_DATA_TYPE_BINARY); - + if (pResInfo->hasResult != DATA_SET_FLAG) { setNull(pCtx->aOutputBuf, pCtx->outputType, pCtx->outputBytes); return; } - + *(double *)pCtx->aOutputBuf = pCtx->param[3].dKey - pCtx->param[0].dKey; } else { assert((pCtx->inputType >= TSDB_DATA_TYPE_TINYINT && pCtx->inputType <= TSDB_DATA_TYPE_DOUBLE) || - (pCtx->inputType == TSDB_DATA_TYPE_TIMESTAMP)); - + (pCtx->inputType == TSDB_DATA_TYPE_TIMESTAMP)); + SSpreadInfo *pInfo = GET_RES_INFO(pCtx)->interResultBuf; if (pInfo->hasResult != DATA_SET_FLAG) { setNull(pCtx->aOutputBuf, pCtx->outputType, pCtx->outputBytes); return; } - + *(double *)pCtx->aOutputBuf = pInfo->max - pInfo->min; } @@ -3598,33 +3586,33 @@ void spread_function_finalizer(SQLFunctionCtx *pCtx) { */ int patternMatch(const char *patterStr, const char *str, size_t size, const SPatternCompareInfo *pInfo) { char c, c1; - + int32_t i = 0; int32_t j = 0; - + while ((c = patterStr[i++]) != 0) { if (c == pInfo->matchAll) { /* Match "*" */ - + while ((c = patterStr[i++]) == pInfo->matchAll || c == pInfo->matchOne) { if (c == pInfo->matchOne && (j > size || str[j++] == 0)) { // empty string, return not match return TSDB_PATTERN_NOWILDCARDMATCH; } } - + if (c == 0) { return TSDB_PATTERN_MATCH; /* "*" at the end of the pattern matches */ } - + char next[3] = {toupper(c), tolower(c), 0}; while (1) { size_t n = strcspn(str, next); str += n; - + if (str[0] == 0 || (n >= size - 1)) { break; } - + int32_t ret = patternMatch(&patterStr[i], ++str, size - n - 1, pInfo); if (ret != TSDB_PATTERN_NOMATCH) { return ret; @@ -3632,18 +3620,18 @@ int patternMatch(const char *patterStr, const char *str, size_t size, const SPat } return TSDB_PATTERN_NOWILDCARDMATCH; } - + c1 = str[j++]; - + if (j <= size) { if (c == c1 || tolower(c) == tolower(c1) || (c == pInfo->matchOne && c1 != 0)) { continue; } } - + return TSDB_PATTERN_NOMATCH; } - + return (str[j] == 0 || j >= size) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; } @@ -3651,13 +3639,13 @@ int WCSPatternMatch(const wchar_t *patterStr, const wchar_t *str, size_t size, c wchar_t c, c1; wchar_t matchOne = L'_'; // "_" wchar_t matchAll = L'%'; // "%" - + int32_t i = 0; int32_t j = 0; - + while ((c = patterStr[i++]) != 0) { if (c == matchAll) { /* Match "%" */ - + while ((c = patterStr[i++]) == matchAll || c == matchOne) { if (c == matchOne && (j > size || str[j++] == 0)) { return TSDB_PATTERN_NOWILDCARDMATCH; @@ -3666,38 +3654,38 @@ int WCSPatternMatch(const wchar_t *patterStr, const wchar_t *str, size_t size, c if (c == 0) { return TSDB_PATTERN_MATCH; } - + wchar_t accept[3] = {towupper(c), towlower(c), 0}; while (1) { size_t n = wcsspn(str, accept); - + str += n; if (str[0] == 0 || (n >= size - 1)) { break; } - + str++; - + int32_t ret = WCSPatternMatch(&patterStr[i], str, wcslen(str), pInfo); if (ret != TSDB_PATTERN_NOMATCH) { return ret; } } - + return TSDB_PATTERN_NOWILDCARDMATCH; } - + c1 = str[j++]; - + if (j <= size) { if (c == c1 || towlower(c) == towlower(c1) || (c == matchOne && c1 != 0)) { continue; } } - + return TSDB_PATTERN_NOMATCH; } - + return (str[j] == 0 || j >= size) ? TSDB_PATTERN_MATCH : TSDB_PATTERN_NOMATCH; } @@ -3707,29 +3695,29 @@ static void getStatics_i8(int64_t *primaryKey, int32_t type, int8_t *data, int32 *max = INT64_MIN; *minIndex = 0; *maxIndex = 0; - + assert(numOfRow <= INT16_MAX); - + // int64_t lastKey = 0; // int8_t lastVal = TSDB_DATA_TINYINT_NULL; - + for (int32_t i = 0; i < numOfRow; ++i) { if (isNull((char *)&data[i], type)) { (*numOfNull) += 1; continue; } - + *sum += data[i]; if (*min > data[i]) { *min = data[i]; *minIndex = i; } - + if (*max < data[i]) { *max = data[i]; *maxIndex = i; } - + // if (type != TSDB_DATA_TYPE_BOOL) { // ignore the bool data type pre-calculation // if (isNull((char *)&lastVal, type)) { // lastKey = primaryKey[i]; @@ -3749,29 +3737,29 @@ static void getStatics_i16(int64_t *primaryKey, int16_t *data, int32_t numOfRow, *max = INT64_MIN; *minIndex = 0; *maxIndex = 0; - + assert(numOfRow <= INT16_MAX); - + // int64_t lastKey = 0; // int16_t lastVal = TSDB_DATA_SMALLINT_NULL; - + for (int32_t i = 0; i < numOfRow; ++i) { if (isNull((const char*) &data[i], TSDB_DATA_TYPE_SMALLINT)) { (*numOfNull) += 1; continue; } - + *sum += data[i]; if (*min > data[i]) { *min = data[i]; *minIndex = i; } - + if (*max < data[i]) { *max = data[i]; *maxIndex = i; } - + // if (isNull(&lastVal, TSDB_DATA_TYPE_SMALLINT)) { // lastKey = primaryKey[i]; // lastVal = data[i]; @@ -3789,29 +3777,29 @@ static void getStatics_i32(int64_t *primaryKey, int32_t *data, int32_t numOfRow, *max = INT64_MIN; *minIndex = 0; *maxIndex = 0; - + assert(numOfRow <= INT16_MAX); - + // int64_t lastKey = 0; // int32_t lastVal = TSDB_DATA_INT_NULL; - + for (int32_t i = 0; i < numOfRow; ++i) { if (isNull((const char*) &data[i], TSDB_DATA_TYPE_INT)) { (*numOfNull) += 1; continue; } - + *sum += data[i]; if (*min > data[i]) { *min = data[i]; *minIndex = i; } - + if (*max < data[i]) { *max = data[i]; *maxIndex = i; } - + // if (isNull(&lastVal, TSDB_DATA_TYPE_INT)) { // lastKey = primaryKey[i]; // lastVal = data[i]; @@ -3829,26 +3817,26 @@ static void getStatics_i64(int64_t *primaryKey, int64_t *data, int32_t numOfRow, *max = INT64_MIN; *minIndex = 0; *maxIndex = 0; - + assert(numOfRow <= INT16_MAX); - + for (int32_t i = 0; i < numOfRow; ++i) { if (isNull((const char*) &data[i], TSDB_DATA_TYPE_BIGINT)) { (*numOfNull) += 1; continue; } - + *sum += data[i]; if (*min > data[i]) { *min = data[i]; *minIndex = i; } - + if (*max < data[i]) { *max = data[i]; *maxIndex = i; } - + // if (isNull(&lastVal, TSDB_DATA_TYPE_BIGINT)) { // lastKey = primaryKey[i]; // lastVal = data[i]; @@ -3867,15 +3855,15 @@ static void getStatics_f(int64_t *primaryKey, float *data, int32_t numOfRow, dou double dsum = 0; *minIndex = 0; *maxIndex = 0; - + assert(numOfRow <= INT16_MAX); - + for (int32_t i = 0; i < numOfRow; ++i) { if (isNull((const char*) &data[i], TSDB_DATA_TYPE_FLOAT)) { (*numOfNull) += 1; continue; } - + float fv = 0; fv = GET_FLOAT_VAL(&(data[i])); dsum += fv; @@ -3883,12 +3871,12 @@ static void getStatics_f(int64_t *primaryKey, float *data, int32_t numOfRow, dou fmin = fv; *minIndex = i; } - + if (fmax < fv) { fmax = fv; *maxIndex = i; } - + // if (isNull(&lastVal, TSDB_DATA_TYPE_FLOAT)) { // lastKey = primaryKey[i]; // lastVal = data[i]; @@ -3898,7 +3886,7 @@ static void getStatics_f(int64_t *primaryKey, float *data, int32_t numOfRow, dou // lastVal = data[i]; // } } - + double csum = 0; csum = GET_DOUBLE_VAL(sum); csum += dsum; @@ -3920,15 +3908,15 @@ static void getStatics_d(int64_t *primaryKey, double *data, int32_t numOfRow, do double dsum = 0; *minIndex = 0; *maxIndex = 0; - + assert(numOfRow <= INT16_MAX); - + for (int32_t i = 0; i < numOfRow; ++i) { if (isNull((const char*) &data[i], TSDB_DATA_TYPE_DOUBLE)) { (*numOfNull) += 1; continue; } - + double dv = 0; dv = GET_DOUBLE_VAL(&(data[i])); dsum += dv; @@ -3936,12 +3924,12 @@ static void getStatics_d(int64_t *primaryKey, double *data, int32_t numOfRow, do dmin = dv; *minIndex = i; } - + if (dmax < dv) { dmax = dv; *maxIndex = i; } - + // if (isNull(&lastVal, TSDB_DATA_TYPE_DOUBLE)) { // lastKey = primaryKey[i]; // lastVal = data[i]; @@ -3951,20 +3939,20 @@ static void getStatics_d(int64_t *primaryKey, double *data, int32_t numOfRow, do // lastVal = data[i]; // } } - + double csum = 0; csum = GET_DOUBLE_VAL(sum); csum += dsum; #ifdef _TD_ARM_32_ - SET_DOUBLE_VAL_ALIGN(sum, &csum); + SET_DOUBLE_VAL_ALIGN(sum, &csum); SET_DOUBLE_VAL_ALIGN(max, &dmax); SET_DOUBLE_VAL_ALIGN(min, &dmin); #else - *sum = csum; - *max = dmax; - *min = dmin; + *sum = csum; + *max = dmax; + *min = dmin; #endif } @@ -4004,13 +3992,13 @@ static bool twa_function_setup(SQLFunctionCtx *pCtx) { if (!function_setup(pCtx)) { return false; } - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); //->aOutputBuf + pCtx->outputBytes; STwaInfo * pInfo = pResInfo->interResultBuf; - + pInfo->lastKey = INT64_MIN; pInfo->type = pCtx->inputType; - + return true; } @@ -4042,69 +4030,67 @@ static FORCE_INLINE void setTWALastVal(SQLFunctionCtx *pCtx, const char *data, i static void twa_function(SQLFunctionCtx *pCtx) { void * data = GET_INPUT_CHAR(pCtx); TSKEY *primaryKey = pCtx->ptsList; - - assert(IS_DATA_BLOCK_LOADED(pCtx->blockStatus)); - + int32_t notNullElems = 0; - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); STwaInfo * pInfo = pResInfo->interResultBuf; - + int32_t i = 0; - + // skip null value while (pCtx->hasNull && i < pCtx->size && isNull((char *)data + pCtx->inputBytes * i, pCtx->inputType)) { i++; } - + if (i >= pCtx->size) { return; } - + if (pInfo->lastKey == INT64_MIN) { pInfo->lastKey = pCtx->nStartQueryTimestamp; setTWALastVal(pCtx, data, i, pInfo); - + pInfo->hasResult = DATA_SET_FLAG; } - + notNullElems++; - + if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT || pCtx->inputType == TSDB_DATA_TYPE_DOUBLE) { pInfo->dOutput += pInfo->dLastValue * (primaryKey[i] - pInfo->lastKey); } else { pInfo->iOutput += pInfo->iLastValue * (primaryKey[i] - pInfo->lastKey); } - + pInfo->lastKey = primaryKey[i]; setTWALastVal(pCtx, data, i, pInfo); - + for (++i; i < pCtx->size; i++) { if (pCtx->hasNull && isNull((char *)data + pCtx->inputBytes * i, pCtx->inputType)) { continue; } - + notNullElems++; if (pCtx->inputType == TSDB_DATA_TYPE_FLOAT || pCtx->inputType == TSDB_DATA_TYPE_DOUBLE) { pInfo->dOutput += pInfo->dLastValue * (primaryKey[i] - pInfo->lastKey); } else { pInfo->iOutput += pInfo->iLastValue * (primaryKey[i] - pInfo->lastKey); } - + pInfo->lastKey = primaryKey[i]; setTWALastVal(pCtx, data, i, pInfo); } - + SET_VAL(pCtx, notNullElems, 1); - + if (notNullElems > 0) { pResInfo->hasResult = DATA_SET_FLAG; } - + if (pResInfo->superTableQ) { memcpy(pCtx->aOutputBuf, pInfo, sizeof(STwaInfo)); } - + // pCtx->numOfIteratedElems += notNullElems; } @@ -4149,33 +4135,33 @@ static void twa_function_f(SQLFunctionCtx *pCtx, int32_t index) { static void twa_func_merge(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); assert(pResInfo->superTableQ); - + STwaInfo *pBuf = (STwaInfo *)pCtx->aOutputBuf; char * indicator = pCtx->aInputElemBuf; - + int32_t numOfNotNull = 0; for (int32_t i = 0; i < pCtx->size; ++i, indicator += sizeof(STwaInfo)) { STwaInfo *pInput = (STwaInfo*) indicator; - + if (pInput->hasResult != DATA_SET_FLAG) { continue; } - + numOfNotNull++; if (pCtx->inputType >= TSDB_DATA_TYPE_TINYINT && pCtx->inputType <= TSDB_DATA_TYPE_BIGINT) { pBuf->iOutput += pInput->iOutput; } else { pBuf->dOutput += pInput->dOutput; } - + pBuf->SKey = pInput->SKey; pBuf->EKey = pInput->EKey; pBuf->lastKey = pInput->lastKey; pBuf->iLastValue = pInput->iLastValue; } - + SET_VAL(pCtx, numOfNotNull, 1); - + if (numOfNotNull > 0) { pBuf->hasResult = DATA_SET_FLAG; } @@ -4189,22 +4175,22 @@ static void twa_func_merge(SQLFunctionCtx *pCtx) { void twa_function_copy(SQLFunctionCtx *pCtx) { assert(pCtx->inputType == TSDB_DATA_TYPE_BINARY); SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + memcpy(pResInfo->interResultBuf, pCtx->aInputElemBuf, (size_t)pCtx->inputBytes); pResInfo->hasResult = ((STwaInfo *)pCtx->aInputElemBuf)->hasResult; } void twa_function_finalizer(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + STwaInfo *pInfo = (STwaInfo *)pResInfo->interResultBuf; assert(pInfo->EKey >= pInfo->lastKey && pInfo->hasResult == pResInfo->hasResult); - + if (pInfo->hasResult != DATA_SET_FLAG) { setNull(pCtx->aOutputBuf, TSDB_DATA_TYPE_DOUBLE, sizeof(double)); return; } - + if (pInfo->SKey == pInfo->EKey) { *(double *)pCtx->aOutputBuf = 0; } else if (pInfo->type >= TSDB_DATA_TYPE_TINYINT && pInfo->type <= TSDB_DATA_TYPE_BIGINT) { @@ -4214,7 +4200,7 @@ void twa_function_finalizer(SQLFunctionCtx *pCtx) { pInfo->dOutput += pInfo->dLastValue * (pInfo->EKey - pInfo->lastKey); *(double *)pCtx->aOutputBuf = pInfo->dOutput / (pInfo->EKey - pInfo->SKey); } - + GET_RES_INFO(pCtx)->numOfRes = 1; doFinalizer(pCtx); } @@ -4237,10 +4223,10 @@ static void interp_function(SQLFunctionCtx *pCtx) { * Note: the result of primary timestamp column uses the timestamp specified by user in the query sql */ assert(pCtx->param[3].i64Key == 2); - + SInterpInfo interpInfo = *(SInterpInfo *)pCtx->aOutputBuf; SInterpInfoDetail *pInfoDetail = interpInfo.pInterpDetail; - + /* set no output result */ if (pInfoDetail->type == TSDB_INTERPO_NONE) { pCtx->param[3].i64Key = 0; @@ -4254,33 +4240,33 @@ static void interp_function(SQLFunctionCtx *pCtx) { } else if (pInfoDetail->type == TSDB_INTERPO_PREV) { char *data = pCtx->param[1].pz; char *pVal = data + TSDB_KEYSIZE; - + if (pCtx->outputType == TSDB_DATA_TYPE_FLOAT) { float v = GET_DOUBLE_VAL(pVal); assignVal(pCtx->aOutputBuf, (const char*) &v, pCtx->outputBytes, pCtx->outputType); } else { assignVal(pCtx->aOutputBuf, pVal, pCtx->outputBytes, pCtx->outputType); } - + } else if (pInfoDetail->type == TSDB_INTERPO_LINEAR) { char *data1 = pCtx->param[1].pz; char *data2 = pCtx->param[2].pz; - + char *pVal1 = data1 + TSDB_KEYSIZE; char *pVal2 = data2 + TSDB_KEYSIZE; - + SPoint point1 = {.key = *(TSKEY *)data1, .val = &pCtx->param[1].i64Key}; SPoint point2 = {.key = *(TSKEY *)data2, .val = &pCtx->param[2].i64Key}; - + SPoint point = {.key = pInfoDetail->ts, .val = pCtx->aOutputBuf}; - + int32_t srcType = pCtx->inputType; if ((srcType >= TSDB_DATA_TYPE_TINYINT && srcType <= TSDB_DATA_TYPE_BIGINT) || srcType == TSDB_DATA_TYPE_TIMESTAMP || srcType == TSDB_DATA_TYPE_DOUBLE) { point1.val = pVal1; - + point2.val = pVal2; - + if (isNull(pVal1, srcType) || isNull(pVal2, srcType)) { setNull(pCtx->aOutputBuf, srcType, pCtx->inputBytes); } else { @@ -4289,30 +4275,30 @@ static void interp_function(SQLFunctionCtx *pCtx) { } else if (srcType == TSDB_DATA_TYPE_FLOAT) { float v1 = GET_DOUBLE_VAL(pVal1); float v2 = GET_DOUBLE_VAL(pVal2); - + point1.val = &v1; point2.val = &v2; - + if (isNull(pVal1, srcType) || isNull(pVal2, srcType)) { setNull(pCtx->aOutputBuf, srcType, pCtx->inputBytes); } else { taosDoLinearInterpolation(pCtx->outputType, &point1, &point2, &point); } - + } else { setNull(pCtx->aOutputBuf, srcType, pCtx->inputBytes); } } } - + free(interpInfo.pInterpDetail); } - + pCtx->size = pCtx->param[3].i64Key; - + tVariantDestroy(&pCtx->param[1]); tVariantDestroy(&pCtx->param[2]); - + // data in the check operation are all null, not output SET_VAL(pCtx, pCtx->size, 1); } @@ -4321,10 +4307,10 @@ static bool ts_comp_function_setup(SQLFunctionCtx *pCtx) { if (!function_setup(pCtx)) { return false; // not initialized since it has been initialized } - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); STSCompInfo *pInfo = pResInfo->interResultBuf; - + pInfo->pTSBuf = tsBufCreate(false); pInfo->pTSBuf->tsOrder = pCtx->order; return true; @@ -4333,9 +4319,9 @@ static bool ts_comp_function_setup(SQLFunctionCtx *pCtx) { static void ts_comp_function(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); STSBuf * pTSbuf = ((STSCompInfo *)(pResInfo->interResultBuf))->pTSBuf; - + const char *input = GET_INPUT_CHAR(pCtx); - + // primary ts must be existed, so no need to check its existance if (pCtx->order == TSQL_SO_ASC) { tsBufAppend(pTSbuf, 0, pCtx->tag.i64Key, input, pCtx->size * TSDB_KEYSIZE); @@ -4345,9 +4331,9 @@ static void ts_comp_function(SQLFunctionCtx *pCtx) { tsBufAppend(pTSbuf, 0, pCtx->tag.i64Key, d, TSDB_KEYSIZE); } } - + SET_VAL(pCtx, pCtx->size, 1); - + pResInfo->hasResult = DATA_SET_FLAG; } @@ -4356,27 +4342,27 @@ static void ts_comp_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); STSCompInfo *pInfo = pResInfo->interResultBuf; - + STSBuf *pTSbuf = pInfo->pTSBuf; - + tsBufAppend(pTSbuf, 0, pCtx->tag.i64Key, pData, TSDB_KEYSIZE); SET_VAL(pCtx, pCtx->size, 1); - + pResInfo->hasResult = DATA_SET_FLAG; } static void ts_comp_finalize(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); - + STSCompInfo *pInfo = pResInfo->interResultBuf; STSBuf * pTSbuf = pInfo->pTSBuf; - + tsBufFlush(pTSbuf); strcpy(pCtx->aOutputBuf, pTSbuf->path); - + tsBufDestory(pTSbuf); doFinalizer(pCtx); } @@ -4390,7 +4376,7 @@ static double do_calc_rate(const SRateInfo* pRateInfo) { } int64_t diff = 0; - + if (pRateInfo->isIRate) { diff = pRateInfo->lastValue; if (diff >= pRateInfo->firstValue) { @@ -4402,14 +4388,14 @@ static double do_calc_rate(const SRateInfo* pRateInfo) { return 0; } } - + int64_t duration = pRateInfo->lastKey - pRateInfo->firstKey; duration = (duration + 500) / 1000; - + double resultVal = ((double)diff) / duration; - - pTrace("do_calc_rate() isIRate:%d firstKey:%" PRId64 " lastKey:%" PRId64 " firstValue:%" PRId64 " lastValue:%" PRId64 " CorrectionValue:%" PRId64 " resultVal:%f", - pRateInfo->isIRate, pRateInfo->firstKey, pRateInfo->lastKey, pRateInfo->firstValue, pRateInfo->lastValue, pRateInfo->CorrectionValue, resultVal); + + pTrace("do_calc_rate() isIRate:%d firstKey:%" PRId64 " lastKey:%" PRId64 " firstValue:%" PRId64 " lastValue:%" PRId64 " CorrectionValue:%" PRId64 " resultVal:%f", + pRateInfo->isIRate, pRateInfo->firstKey, pRateInfo->lastKey, pRateInfo->firstValue, pRateInfo->lastValue, pRateInfo->CorrectionValue, resultVal); return resultVal; } @@ -4419,10 +4405,10 @@ static bool rate_function_setup(SQLFunctionCtx *pCtx) { if (!function_setup(pCtx)) { return false; } - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); //->aOutputBuf + pCtx->outputBytes; SRateInfo * pInfo = pResInfo->interResultBuf; - + pInfo->CorrectionValue = 0; pInfo->firstKey = INT64_MIN; pInfo->lastKey = INT64_MIN; @@ -4438,23 +4424,21 @@ static bool rate_function_setup(SQLFunctionCtx *pCtx) { static void rate_function(SQLFunctionCtx *pCtx) { - - assert(IS_DATA_BLOCK_LOADED(pCtx->blockStatus)); - + int32_t notNullElems = 0; SResultInfo *pResInfo = GET_RES_INFO(pCtx); SRateInfo *pRateInfo = (SRateInfo *)pResInfo->interResultBuf; TSKEY *primaryKey = pCtx->ptsList; - + pTrace("%p rate_function() size:%d, hasNull:%d", pCtx, pCtx->size, pCtx->hasNull); - + for (int32_t i = 0; i < pCtx->size; ++i) { char *pData = GET_INPUT_CHAR_INDEX(pCtx, i); if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { pTrace("%p rate_function() index of null data:%d", pCtx, i); continue; } - + notNullElems++; int64_t v = 0; @@ -4474,37 +4458,37 @@ static void rate_function(SQLFunctionCtx *pCtx) { default: assert(0); } - + if ((INT64_MIN == pRateInfo->firstValue) || (INT64_MIN == pRateInfo->firstKey)) { pRateInfo->firstValue = v; pRateInfo->firstKey = primaryKey[i]; - + pTrace("firstValue:%" PRId64 " firstKey:%" PRId64, pRateInfo->firstValue, pRateInfo->firstKey); } - + if (INT64_MIN == pRateInfo->lastValue) { pRateInfo->lastValue = v; } else if (v < pRateInfo->lastValue) { pRateInfo->CorrectionValue += pRateInfo->lastValue; pTrace("CorrectionValue:%" PRId64, pRateInfo->CorrectionValue); } - + pRateInfo->lastValue = v; pRateInfo->lastKey = primaryKey[i]; pTrace("lastValue:%" PRId64 " lastKey:%" PRId64, pRateInfo->lastValue, pRateInfo->lastKey); } - + if (!pCtx->hasNull) { assert(pCtx->size == notNullElems); } - + SET_VAL(pCtx, notNullElems, 1); - + if (notNullElems > 0) { pRateInfo->hasResult = DATA_SET_FLAG; pResInfo->hasResult = DATA_SET_FLAG; } - + // keep the data into the final output buffer for super table query since this execution may be the last one if (pResInfo->superTableQ) { memcpy(pCtx->aOutputBuf, pResInfo->interResultBuf, sizeof(SRateInfo)); @@ -4516,12 +4500,12 @@ static void rate_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + // NOTE: keep the intermediate result into the interResultBuf SResultInfo *pResInfo = GET_RES_INFO(pCtx); SRateInfo *pRateInfo = (SRateInfo *)pResInfo->interResultBuf; TSKEY *primaryKey = pCtx->ptsList; - + int64_t v = 0; switch (pCtx->inputType) { case TSDB_DATA_TYPE_TINYINT: @@ -4543,25 +4527,25 @@ static void rate_function_f(SQLFunctionCtx *pCtx, int32_t index) { if ((INT64_MIN == pRateInfo->firstValue) || (INT64_MIN == pRateInfo->firstKey)) { pRateInfo->firstValue = v; pRateInfo->firstKey = primaryKey[index]; - } + } if (INT64_MIN == pRateInfo->lastValue) { pRateInfo->lastValue = v; } else if (v < pRateInfo->lastValue) { pRateInfo->CorrectionValue += pRateInfo->lastValue; } - + pRateInfo->lastValue = v; pRateInfo->lastKey = primaryKey[index]; - + pTrace("====%p rate_function_f() index:%d lastValue:%" PRId64 " lastKey:%" PRId64 " CorrectionValue:%" PRId64, pCtx, index, pRateInfo->lastValue, pRateInfo->lastKey, pRateInfo->CorrectionValue); - + SET_VAL(pCtx, 1, 1); - + // set has result flag pRateInfo->hasResult = DATA_SET_FLAG; pResInfo->hasResult = DATA_SET_FLAG; - + // keep the data into the final output buffer for super table query since this execution may be the last one if (pResInfo->superTableQ) { memcpy(pCtx->aOutputBuf, pResInfo->interResultBuf, sizeof(SRateInfo)); @@ -4573,30 +4557,30 @@ static void rate_function_f(SQLFunctionCtx *pCtx, int32_t index) { static void rate_func_merge(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); assert(pResInfo->superTableQ); - + pTrace("rate_func_merge() size:%d", pCtx->size); - + //SRateInfo *pRateInfo = (SRateInfo *)pResInfo->interResultBuf; SRateInfo *pBuf = (SRateInfo *)pCtx->aOutputBuf; char *indicator = pCtx->aInputElemBuf; - + assert(1 == pCtx->size); - + int32_t numOfNotNull = 0; for (int32_t i = 0; i < pCtx->size; ++i, indicator += sizeof(SRateInfo)) { SRateInfo *pInput = (SRateInfo *)indicator; if (DATA_SET_FLAG != pInput->hasResult) { continue; } - + numOfNotNull++; memcpy(pBuf, pInput, sizeof(SRateInfo)); - pTrace("%p rate_func_merge() isIRate:%d firstKey:%" PRId64 " lastKey:%" PRId64 " firstValue:%" PRId64 " lastValue:%" PRId64 " CorrectionValue:%" PRId64, - pCtx, pInput->isIRate, pInput->firstKey, pInput->lastKey, pInput->firstValue, pInput->lastValue, pInput->CorrectionValue); + pTrace("%p rate_func_merge() isIRate:%d firstKey:%" PRId64 " lastKey:%" PRId64 " firstValue:%" PRId64 " lastValue:%" PRId64 " CorrectionValue:%" PRId64, + pCtx, pInput->isIRate, pInput->firstKey, pInput->lastKey, pInput->firstValue, pInput->lastValue, pInput->CorrectionValue); } - + SET_VAL(pCtx, numOfNotNull, 1); - + if (numOfNotNull > 0) { pBuf->hasResult = DATA_SET_FLAG; } @@ -4608,14 +4592,14 @@ static void rate_func_merge(SQLFunctionCtx *pCtx) { static void rate_func_copy(SQLFunctionCtx *pCtx) { assert(pCtx->inputType == TSDB_DATA_TYPE_BINARY); - + SResultInfo *pResInfo = GET_RES_INFO(pCtx); memcpy(pResInfo->interResultBuf, pCtx->aInputElemBuf, (size_t)pCtx->inputBytes); pResInfo->hasResult = ((SRateInfo*)pCtx->aInputElemBuf)->hasResult; - + SRateInfo* pRateInfo = (SRateInfo*)pCtx->aInputElemBuf; - pTrace("%p rate_func_second_merge() firstKey:%" PRId64 " lastKey:%" PRId64 " firstValue:%" PRId64 " lastValue:%" PRId64 " CorrectionValue:%" PRId64 " hasResult:%d", - pCtx, pRateInfo->firstKey, pRateInfo->lastKey, pRateInfo->firstValue, pRateInfo->lastValue, pRateInfo->CorrectionValue, pRateInfo->hasResult); + pTrace("%p rate_func_second_merge() firstKey:%" PRId64 " lastKey:%" PRId64 " firstValue:%" PRId64 " lastValue:%" PRId64 " CorrectionValue:%" PRId64 " hasResult:%d", + pCtx, pRateInfo->firstKey, pRateInfo->lastKey, pRateInfo->firstValue, pRateInfo->lastValue, pRateInfo->CorrectionValue, pRateInfo->hasResult); } @@ -4624,9 +4608,9 @@ static void rate_finalizer(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); SRateInfo *pRateInfo = (SRateInfo *)pResInfo->interResultBuf; - pTrace("%p isIRate:%d firstKey:%" PRId64 " lastKey:%" PRId64 " firstValue:%" PRId64 " lastValue:%" PRId64 " CorrectionValue:%" PRId64 " hasResult:%d", - pCtx, pRateInfo->isIRate, pRateInfo->firstKey, pRateInfo->lastKey, pRateInfo->firstValue, pRateInfo->lastValue, pRateInfo->CorrectionValue, pRateInfo->hasResult); - + pTrace("%p isIRate:%d firstKey:%" PRId64 " lastKey:%" PRId64 " firstValue:%" PRId64 " lastValue:%" PRId64 " CorrectionValue:%" PRId64 " hasResult:%d", + pCtx, pRateInfo->isIRate, pRateInfo->firstKey, pRateInfo->lastKey, pRateInfo->firstValue, pRateInfo->lastValue, pRateInfo->CorrectionValue, pRateInfo->hasResult); + if (pRateInfo->hasResult != DATA_SET_FLAG) { setNull(pCtx->aOutputBuf, TSDB_DATA_TYPE_DOUBLE, sizeof(double)); return; @@ -4635,37 +4619,35 @@ static void rate_finalizer(SQLFunctionCtx *pCtx) { *(double*)pCtx->aOutputBuf = do_calc_rate(pRateInfo); pTrace("rate_finalizer() output result:%f", *(double *)pCtx->aOutputBuf); - + // cannot set the numOfIteratedElems again since it is set during previous iteration pResInfo->numOfRes = 1; pResInfo->hasResult = DATA_SET_FLAG; - + doFinalizer(pCtx); } static void irate_function(SQLFunctionCtx *pCtx) { - - assert(IS_DATA_BLOCK_LOADED(pCtx->blockStatus)); - + int32_t notNullElems = 0; SResultInfo *pResInfo = GET_RES_INFO(pCtx); SRateInfo *pRateInfo = (SRateInfo *)pResInfo->interResultBuf; TSKEY *primaryKey = pCtx->ptsList; - + pTrace("%p irate_function() size:%d, hasNull:%d", pCtx, pCtx->size, pCtx->hasNull); - + if (pCtx->size < 1) { return; } - + for (int32_t i = pCtx->size - 1; i >= 0; --i) { char *pData = GET_INPUT_CHAR_INDEX(pCtx, i); if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { pTrace("%p irate_function() index of null data:%d", pCtx, i); continue; } - + notNullElems++; int64_t v = 0; @@ -4685,32 +4667,32 @@ static void irate_function(SQLFunctionCtx *pCtx) { default: assert(0); } - + // TODO: calc once if only call this function once ???? if ((INT64_MIN == pRateInfo->lastKey) || (INT64_MIN == pRateInfo->lastValue)) { pRateInfo->lastValue = v; pRateInfo->lastKey = primaryKey[i]; - + pTrace("%p irate_function() lastValue:%" PRId64 " lastKey:%" PRId64, pCtx, pRateInfo->lastValue, pRateInfo->lastKey); continue; } - + if ((INT64_MIN == pRateInfo->firstKey) || (INT64_MIN == pRateInfo->firstValue)){ pRateInfo->firstValue = v; pRateInfo->firstKey = primaryKey[i]; - + pTrace("%p irate_function() firstValue:%" PRId64 " firstKey:%" PRId64, pCtx, pRateInfo->firstValue, pRateInfo->firstKey); break; } } - + SET_VAL(pCtx, notNullElems, 1); - + if (notNullElems > 0) { pRateInfo->hasResult = DATA_SET_FLAG; pResInfo->hasResult = DATA_SET_FLAG; } - + // keep the data into the final output buffer for super table query since this execution may be the last one if (pResInfo->superTableQ) { memcpy(pCtx->aOutputBuf, pResInfo->interResultBuf, sizeof(SRateInfo)); @@ -4722,12 +4704,12 @@ static void irate_function_f(SQLFunctionCtx *pCtx, int32_t index) { if (pCtx->hasNull && isNull(pData, pCtx->inputType)) { return; } - + // NOTE: keep the intermediate result into the interResultBuf SResultInfo *pResInfo = GET_RES_INFO(pCtx); SRateInfo *pRateInfo = (SRateInfo *)pResInfo->interResultBuf; TSKEY *primaryKey = pCtx->ptsList; - + int64_t v = 0; switch (pCtx->inputType) { case TSDB_DATA_TYPE_TINYINT: @@ -4745,21 +4727,21 @@ static void irate_function_f(SQLFunctionCtx *pCtx, int32_t index) { default: assert(0); } - + pRateInfo->firstKey = pRateInfo->lastKey; pRateInfo->firstValue = pRateInfo->lastValue; pRateInfo->lastValue = v; pRateInfo->lastKey = primaryKey[index]; - + pTrace("====%p irate_function_f() index:%d lastValue:%" PRId64 " lastKey:%" PRId64 " firstValue:%" PRId64 " firstKey:%" PRId64, pCtx, index, pRateInfo->lastValue, pRateInfo->lastKey, pRateInfo->firstValue , pRateInfo->firstKey); - + SET_VAL(pCtx, 1, 1); - + // set has result flag pRateInfo->hasResult = DATA_SET_FLAG; pResInfo->hasResult = DATA_SET_FLAG; - + // keep the data into the final output buffer for super table query since this execution may be the last one if (pResInfo->superTableQ) { memcpy(pCtx->aOutputBuf, pResInfo->interResultBuf, sizeof(SRateInfo)); @@ -4769,15 +4751,15 @@ static void irate_function_f(SQLFunctionCtx *pCtx, int32_t index) { static void do_sumrate_merge(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); assert(pResInfo->superTableQ); - + SRateInfo *pRateInfo = (SRateInfo *)pResInfo->interResultBuf; char * input = GET_INPUT_CHAR(pCtx); - + for (int32_t i = 0; i < pCtx->size; ++i, input += pCtx->inputBytes) { SRateInfo *pInput = (SRateInfo *)input; pTrace("%p do_sumrate_merge() hasResult:%d input num:%" PRId64 " input sum:%f total num:%" PRId64 " total sum:%f", pCtx, pInput->hasResult, pInput->num, pInput->sum, pRateInfo->num, pRateInfo->sum); - + if (pInput->hasResult != DATA_SET_FLAG) { continue; } else if (pInput->num == 0) { @@ -4789,7 +4771,7 @@ static void do_sumrate_merge(SQLFunctionCtx *pCtx) { } pRateInfo->hasResult = DATA_SET_FLAG; } - + // if the data set hasResult is not set, the result is null if (DATA_SET_FLAG == pRateInfo->hasResult) { pResInfo->hasResult = DATA_SET_FLAG; @@ -4811,23 +4793,23 @@ static void sumrate_func_second_merge(SQLFunctionCtx *pCtx) { static void sumrate_finalizer(SQLFunctionCtx *pCtx) { SResultInfo *pResInfo = GET_RES_INFO(pCtx); SRateInfo *pRateInfo = (SRateInfo *)pResInfo->interResultBuf; - + pTrace("%p sumrate_finalizer() superTableQ:%d num:%" PRId64 " sum:%f hasResult:%d", pCtx, pResInfo->superTableQ, pRateInfo->num, pRateInfo->sum, pRateInfo->hasResult); - + if (pRateInfo->hasResult != DATA_SET_FLAG) { setNull(pCtx->aOutputBuf, TSDB_DATA_TYPE_DOUBLE, sizeof(double)); return; } - + if (pRateInfo->num == 0) { - // from meter - *(double*)pCtx->aOutputBuf = do_calc_rate(pRateInfo); + // from meter + *(double*)pCtx->aOutputBuf = do_calc_rate(pRateInfo); } else if (pCtx->functionId == TSDB_FUNC_SUM_RATE || pCtx->functionId == TSDB_FUNC_SUM_IRATE) { - *(double*)pCtx->aOutputBuf = pRateInfo->sum; + *(double*)pCtx->aOutputBuf = pRateInfo->sum; } else { - *(double*)pCtx->aOutputBuf = pRateInfo->sum / pRateInfo->num; + *(double*)pCtx->aOutputBuf = pRateInfo->sum / pRateInfo->num; } - + pResInfo->numOfRes = 1; pResInfo->hasResult = DATA_SET_FLAG; doFinalizer(pCtx); @@ -4849,527 +4831,527 @@ static void sumrate_finalizer(SQLFunctionCtx *pCtx) { * */ int32_t funcCompatDefList[] = { - // count, sum, avg, min, max, stddev, percentile, apercentile, first, last - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - // last_row, top, bottom, spread, twa, leastsqr, ts, ts_dummy, tag_dummy, ts_z - 4, -1, -1, 1, 1, 1, 1, 1, 1, -1, - // tag, colprj, tagprj, arithmetic, diff, first_dist, last_dist, interp rate irate - 1, 1, 1, 1, -1, 1, 1, 5, 1, 1, - // sum_rate, sum_irate, avg_rate, avg_irate - 1, 1, 1, 1, + // count, sum, avg, min, max, stddev, percentile, apercentile, first, last + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + // last_row, top, bottom, spread, twa, leastsqr, ts, ts_dummy, tag_dummy, ts_z + 4, -1, -1, 1, 1, 1, 1, 1, 1, -1, + // tag, colprj, tagprj, arithmetic, diff, first_dist, last_dist, interp rate irate + 1, 1, 1, 1, -1, 1, 1, 5, 1, 1, + // sum_rate, sum_irate, avg_rate, avg_irate + 1, 1, 1, 1, }; SQLAggFuncElem aAggs[] = {{ - // 0, count function does not invoke the finalize function - "count", - TSDB_FUNC_COUNT, - TSDB_FUNC_COUNT, - TSDB_BASE_FUNC_SO, - function_setup, - count_function, - count_function_f, - no_next_step, - doFinalizer, - count_func_merge, - count_func_merge, - count_load_data_info, - }, - { - // 1 - "sum", - TSDB_FUNC_SUM, - TSDB_FUNC_SUM, - TSDB_BASE_FUNC_SO, - function_setup, - sum_function, - sum_function_f, - no_next_step, - function_finalizer, - sum_func_merge, - sum_func_second_merge, - precal_req_load_info, - }, - { - // 2 - "avg", - TSDB_FUNC_AVG, - TSDB_FUNC_AVG, - TSDB_BASE_FUNC_SO, - function_setup, - avg_function, - avg_function_f, - no_next_step, - avg_finalizer, - avg_func_merge, - avg_func_second_merge, - precal_req_load_info, - }, - { - // 3 - "min", - TSDB_FUNC_MIN, - TSDB_FUNC_MIN, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_SELECTIVITY, - min_func_setup, - min_function, - min_function_f, - no_next_step, - function_finalizer, - min_func_merge, - min_func_second_merge, - precal_req_load_info, - }, - { - // 4 - "max", - TSDB_FUNC_MAX, - TSDB_FUNC_MAX, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_SELECTIVITY, - max_func_setup, - max_function, - max_function_f, - no_next_step, - function_finalizer, - max_func_merge, - max_func_second_merge, - precal_req_load_info, - }, - { - // 5 - "stddev", - TSDB_FUNC_STDDEV, - TSDB_FUNC_INVALID_ID, - TSDB_FUNCSTATE_SO | TSDB_FUNCSTATE_STREAM | TSDB_FUNCSTATE_OF, - function_setup, - stddev_function, - stddev_function_f, - stddev_next_step, - stddev_finalizer, - noop1, - noop1, - data_req_load_info, - }, - { - // 6 - "percentile", - TSDB_FUNC_PERCT, - TSDB_FUNC_INVALID_ID, - TSDB_FUNCSTATE_SO | TSDB_FUNCSTATE_STREAM | TSDB_FUNCSTATE_OF, - percentile_function_setup, - percentile_function, - percentile_function_f, - no_next_step, - percentile_finalizer, - noop1, - noop1, - data_req_load_info, - }, - { - // 7 - "apercentile", - TSDB_FUNC_APERCT, - TSDB_FUNC_APERCT, - TSDB_FUNCSTATE_SO | TSDB_FUNCSTATE_STREAM | TSDB_FUNCSTATE_OF | TSDB_FUNCSTATE_METRIC, - apercentile_function_setup, - apercentile_function, - apercentile_function_f, - no_next_step, - apercentile_finalizer, - apercentile_func_merge, - apercentile_func_second_merge, - data_req_load_info, - }, - { - // 8 - "first", - TSDB_FUNC_FIRST, - TSDB_FUNC_FIRST_DST, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_SELECTIVITY, - function_setup, - first_function, - first_function_f, - no_next_step, - function_finalizer, - noop1, - noop1, - first_data_req_info, - }, - { - // 9 - "last", - TSDB_FUNC_LAST, - TSDB_FUNC_LAST_DST, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_SELECTIVITY, - function_setup, - last_function, - last_function_f, - no_next_step, - function_finalizer, - noop1, - noop1, - last_data_req_info, - }, - { - // 10 - "last_row", - TSDB_FUNC_LAST_ROW, - TSDB_FUNC_LAST_ROW, - TSDB_FUNCSTATE_SO | TSDB_FUNCSTATE_OF | TSDB_FUNCSTATE_METRIC | TSDB_FUNCSTATE_NEED_TS | - TSDB_FUNCSTATE_SELECTIVITY, - first_last_function_setup, - last_row_function, - noop2, - no_next_step, - last_row_finalizer, - noop1, - last_dist_func_second_merge, - data_req_load_info, - }, - { - // 11 - "top", - TSDB_FUNC_TOP, - TSDB_FUNC_TOP, - TSDB_FUNCSTATE_MO | TSDB_FUNCSTATE_METRIC | TSDB_FUNCSTATE_OF | TSDB_FUNCSTATE_NEED_TS | - TSDB_FUNCSTATE_SELECTIVITY, - top_bottom_function_setup, - top_function, - top_function_f, - no_next_step, - top_bottom_func_finalizer, - top_func_merge, - top_func_second_merge, - data_req_load_info, - }, - { - // 12 - "bottom", - TSDB_FUNC_BOTTOM, - TSDB_FUNC_BOTTOM, - TSDB_FUNCSTATE_MO | TSDB_FUNCSTATE_METRIC | TSDB_FUNCSTATE_OF | TSDB_FUNCSTATE_NEED_TS | - TSDB_FUNCSTATE_SELECTIVITY, - top_bottom_function_setup, - bottom_function, - bottom_function_f, - no_next_step, - top_bottom_func_finalizer, - bottom_func_merge, - bottom_func_second_merge, - data_req_load_info, - }, - { - // 13 - "spread", - TSDB_FUNC_SPREAD, - TSDB_FUNC_SPREAD, - TSDB_BASE_FUNC_SO, - spread_function_setup, - spread_function, - spread_function_f, - no_next_step, - spread_function_finalizer, - spread_func_merge, - spread_func_sec_merge, - count_load_data_info, - }, - { - // 14 - "twa", - TSDB_FUNC_TWA, - TSDB_FUNC_TWA, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, - twa_function_setup, - twa_function, - twa_function_f, - no_next_step, - twa_function_finalizer, - twa_func_merge, - twa_function_copy, - data_req_load_info, - }, - { - // 15 - "leastsquares", - TSDB_FUNC_LEASTSQR, - TSDB_FUNC_INVALID_ID, - TSDB_FUNCSTATE_SO | TSDB_FUNCSTATE_STREAM | TSDB_FUNCSTATE_OF, - leastsquares_function_setup, - leastsquares_function, - leastsquares_function_f, - no_next_step, - leastsquares_finalizer, - noop1, - noop1, - data_req_load_info, - }, - { - // 16 - "ts", - TSDB_FUNC_TS, - TSDB_FUNC_TS, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, - function_setup, - date_col_output_function, - date_col_output_function_f, - no_next_step, - doFinalizer, - copy_function, - copy_function, - no_data_info, - }, - { - // 17 - "ts", - TSDB_FUNC_TS_DUMMY, - TSDB_FUNC_TS_DUMMY, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, - function_setup, - noop1, - noop2, - no_next_step, - doFinalizer, - copy_function, - copy_function, - data_req_load_info, - }, - { - // 18 - "tag", - TSDB_FUNC_TAG_DUMMY, - TSDB_FUNC_TAG_DUMMY, - TSDB_BASE_FUNC_SO, - function_setup, - tag_function, - noop2, - no_next_step, - doFinalizer, - copy_function, - copy_function, - no_data_info, - }, - { - // 19 - "ts", - TSDB_FUNC_TS_COMP, - TSDB_FUNC_TS_COMP, - TSDB_FUNCSTATE_MO | TSDB_FUNCSTATE_NEED_TS, - ts_comp_function_setup, - ts_comp_function, - ts_comp_function_f, - no_next_step, - ts_comp_finalize, - copy_function, - copy_function, - data_req_load_info, - }, - { - // 20 - "tag", - TSDB_FUNC_TAG, - TSDB_FUNC_TAG, - TSDB_BASE_FUNC_SO, - function_setup, - tag_function, - tag_function_f, - no_next_step, - doFinalizer, - copy_function, - copy_function, - no_data_info, - }, - { - // 21, column project sql function - "colprj", - TSDB_FUNC_PRJ, - TSDB_FUNC_PRJ, - TSDB_BASE_FUNC_MO | TSDB_FUNCSTATE_NEED_TS, - function_setup, - col_project_function, - col_project_function_f, - no_next_step, - doFinalizer, - copy_function, - copy_function, - data_req_load_info, - }, - { - // 22, multi-output, tag function has only one result - "tagprj", - TSDB_FUNC_TAGPRJ, - TSDB_FUNC_TAGPRJ, - TSDB_BASE_FUNC_MO, - function_setup, - tag_project_function, - tag_project_function_f, - no_next_step, - doFinalizer, - copy_function, - copy_function, - no_data_info, - }, - { - // 23 - "arithmetic", - TSDB_FUNC_ARITHM, - TSDB_FUNC_ARITHM, - TSDB_FUNCSTATE_MO | TSDB_FUNCSTATE_METRIC | TSDB_FUNCSTATE_NEED_TS, - function_setup, - arithmetic_function, - arithmetic_function_f, - no_next_step, - doFinalizer, - copy_function, - copy_function, - data_req_load_info, - }, - { - // 24 - "diff", - TSDB_FUNC_DIFF, - TSDB_FUNC_INVALID_ID, - TSDB_FUNCSTATE_MO | TSDB_FUNCSTATE_NEED_TS, - diff_function_setup, - diff_function, - diff_function_f, - no_next_step, - doFinalizer, - noop1, - noop1, - data_req_load_info, - }, - // distributed version used in two-stage aggregation processes - { - // 25 - "first_dist", - TSDB_FUNC_FIRST_DST, - TSDB_FUNC_FIRST_DST, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS | TSDB_FUNCSTATE_SELECTIVITY, - first_last_function_setup, - first_dist_function, - first_dist_function_f, - no_next_step, - function_finalizer, - first_dist_func_merge, - first_dist_func_second_merge, - first_dist_data_req_info, - }, - { - // 26 - "last_dist", - TSDB_FUNC_LAST_DST, - TSDB_FUNC_LAST_DST, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS | TSDB_FUNCSTATE_SELECTIVITY, - first_last_function_setup, - last_dist_function, - last_dist_function_f, - no_next_step, - function_finalizer, - last_dist_func_merge, - last_dist_func_second_merge, - last_dist_data_req_info, - }, - { - // 27 - "interp", - TSDB_FUNC_INTERP, - TSDB_FUNC_INTERP, - TSDB_FUNCSTATE_SO | TSDB_FUNCSTATE_OF | TSDB_FUNCSTATE_METRIC | TSDB_FUNCSTATE_NEED_TS, - function_setup, - interp_function, - do_sum_f, // todo filter handle - no_next_step, - doFinalizer, - noop1, - copy_function, - no_data_info, - }, - { - // 28 - "rate", - TSDB_FUNC_RATE, - TSDB_FUNC_RATE, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, - rate_function_setup, - rate_function, - rate_function_f, - no_next_step, - rate_finalizer, - rate_func_merge, - rate_func_copy, - data_req_load_info, - }, - { - // 29 - "irate", - TSDB_FUNC_IRATE, - TSDB_FUNC_IRATE, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, - rate_function_setup, - irate_function, - irate_function_f, - no_next_step, - rate_finalizer, - rate_func_merge, - rate_func_copy, - data_req_load_info, - }, - { - // 30 - "sum_rate", - TSDB_FUNC_SUM_RATE, - TSDB_FUNC_SUM_RATE, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, - rate_function_setup, - rate_function, - rate_function_f, - no_next_step, - sumrate_finalizer, - sumrate_func_merge, - sumrate_func_second_merge, - data_req_load_info, - }, - { - // 31 - "sum_irate", - TSDB_FUNC_SUM_IRATE, - TSDB_FUNC_SUM_IRATE, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, - rate_function_setup, - irate_function, - irate_function_f, - no_next_step, - sumrate_finalizer, - sumrate_func_merge, - sumrate_func_second_merge, - data_req_load_info, - }, - { - // 32 - "avg_rate", - TSDB_FUNC_AVG_RATE, - TSDB_FUNC_AVG_RATE, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, - rate_function_setup, - rate_function, - rate_function_f, - no_next_step, - sumrate_finalizer, - sumrate_func_merge, - sumrate_func_second_merge, - data_req_load_info, - }, - { - // 33 - "avg_irate", - TSDB_FUNC_AVG_IRATE, - TSDB_FUNC_AVG_IRATE, - TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, - rate_function_setup, - irate_function, - irate_function_f, - no_next_step, - sumrate_finalizer, - sumrate_func_merge, - sumrate_func_second_merge, - data_req_load_info, - }}; + // 0, count function does not invoke the finalize function + "count", + TSDB_FUNC_COUNT, + TSDB_FUNC_COUNT, + TSDB_BASE_FUNC_SO, + function_setup, + count_function, + count_function_f, + no_next_step, + doFinalizer, + count_func_merge, + count_func_merge, + count_load_data_info, + }, + { + // 1 + "sum", + TSDB_FUNC_SUM, + TSDB_FUNC_SUM, + TSDB_BASE_FUNC_SO, + function_setup, + sum_function, + sum_function_f, + no_next_step, + function_finalizer, + sum_func_merge, + sum_func_second_merge, + precal_req_load_info, + }, + { + // 2 + "avg", + TSDB_FUNC_AVG, + TSDB_FUNC_AVG, + TSDB_BASE_FUNC_SO, + function_setup, + avg_function, + avg_function_f, + no_next_step, + avg_finalizer, + avg_func_merge, + avg_func_second_merge, + precal_req_load_info, + }, + { + // 3 + "min", + TSDB_FUNC_MIN, + TSDB_FUNC_MIN, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_SELECTIVITY, + min_func_setup, + min_function, + min_function_f, + no_next_step, + function_finalizer, + min_func_merge, + min_func_second_merge, + precal_req_load_info, + }, + { + // 4 + "max", + TSDB_FUNC_MAX, + TSDB_FUNC_MAX, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_SELECTIVITY, + max_func_setup, + max_function, + max_function_f, + no_next_step, + function_finalizer, + max_func_merge, + max_func_second_merge, + precal_req_load_info, + }, + { + // 5 + "stddev", + TSDB_FUNC_STDDEV, + TSDB_FUNC_INVALID_ID, + TSDB_FUNCSTATE_SO | TSDB_FUNCSTATE_STREAM | TSDB_FUNCSTATE_OF, + function_setup, + stddev_function, + stddev_function_f, + stddev_next_step, + stddev_finalizer, + noop1, + noop1, + data_req_load_info, + }, + { + // 6 + "percentile", + TSDB_FUNC_PERCT, + TSDB_FUNC_INVALID_ID, + TSDB_FUNCSTATE_SO | TSDB_FUNCSTATE_STREAM | TSDB_FUNCSTATE_OF, + percentile_function_setup, + percentile_function, + percentile_function_f, + no_next_step, + percentile_finalizer, + noop1, + noop1, + data_req_load_info, + }, + { + // 7 + "apercentile", + TSDB_FUNC_APERCT, + TSDB_FUNC_APERCT, + TSDB_FUNCSTATE_SO | TSDB_FUNCSTATE_STREAM | TSDB_FUNCSTATE_OF | TSDB_FUNCSTATE_METRIC, + apercentile_function_setup, + apercentile_function, + apercentile_function_f, + no_next_step, + apercentile_finalizer, + apercentile_func_merge, + apercentile_func_second_merge, + data_req_load_info, + }, + { + // 8 + "first", + TSDB_FUNC_FIRST, + TSDB_FUNC_FIRST_DST, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_SELECTIVITY, + function_setup, + first_function, + first_function_f, + no_next_step, + function_finalizer, + noop1, + noop1, + first_data_req_info, + }, + { + // 9 + "last", + TSDB_FUNC_LAST, + TSDB_FUNC_LAST_DST, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_SELECTIVITY, + function_setup, + last_function, + last_function_f, + no_next_step, + function_finalizer, + noop1, + noop1, + last_data_req_info, + }, + { + // 10 + "last_row", + TSDB_FUNC_LAST_ROW, + TSDB_FUNC_LAST_ROW, + TSDB_FUNCSTATE_SO | TSDB_FUNCSTATE_OF | TSDB_FUNCSTATE_METRIC | TSDB_FUNCSTATE_NEED_TS | + TSDB_FUNCSTATE_SELECTIVITY, + first_last_function_setup, + last_row_function, + noop2, + no_next_step, + last_row_finalizer, + noop1, + last_dist_func_second_merge, + data_req_load_info, + }, + { + // 11 + "top", + TSDB_FUNC_TOP, + TSDB_FUNC_TOP, + TSDB_FUNCSTATE_MO | TSDB_FUNCSTATE_METRIC | TSDB_FUNCSTATE_OF | TSDB_FUNCSTATE_NEED_TS | + TSDB_FUNCSTATE_SELECTIVITY, + top_bottom_function_setup, + top_function, + top_function_f, + no_next_step, + top_bottom_func_finalizer, + top_func_merge, + top_func_second_merge, + data_req_load_info, + }, + { + // 12 + "bottom", + TSDB_FUNC_BOTTOM, + TSDB_FUNC_BOTTOM, + TSDB_FUNCSTATE_MO | TSDB_FUNCSTATE_METRIC | TSDB_FUNCSTATE_OF | TSDB_FUNCSTATE_NEED_TS | + TSDB_FUNCSTATE_SELECTIVITY, + top_bottom_function_setup, + bottom_function, + bottom_function_f, + no_next_step, + top_bottom_func_finalizer, + bottom_func_merge, + bottom_func_second_merge, + data_req_load_info, + }, + { + // 13 + "spread", + TSDB_FUNC_SPREAD, + TSDB_FUNC_SPREAD, + TSDB_BASE_FUNC_SO, + spread_function_setup, + spread_function, + spread_function_f, + no_next_step, + spread_function_finalizer, + spread_func_merge, + spread_func_sec_merge, + count_load_data_info, + }, + { + // 14 + "twa", + TSDB_FUNC_TWA, + TSDB_FUNC_TWA, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, + twa_function_setup, + twa_function, + twa_function_f, + no_next_step, + twa_function_finalizer, + twa_func_merge, + twa_function_copy, + data_req_load_info, + }, + { + // 15 + "leastsquares", + TSDB_FUNC_LEASTSQR, + TSDB_FUNC_INVALID_ID, + TSDB_FUNCSTATE_SO | TSDB_FUNCSTATE_STREAM | TSDB_FUNCSTATE_OF, + leastsquares_function_setup, + leastsquares_function, + leastsquares_function_f, + no_next_step, + leastsquares_finalizer, + noop1, + noop1, + data_req_load_info, + }, + { + // 16 + "ts", + TSDB_FUNC_TS, + TSDB_FUNC_TS, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, + function_setup, + date_col_output_function, + date_col_output_function_f, + no_next_step, + doFinalizer, + copy_function, + copy_function, + no_data_info, + }, + { + // 17 + "ts", + TSDB_FUNC_TS_DUMMY, + TSDB_FUNC_TS_DUMMY, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, + function_setup, + noop1, + noop2, + no_next_step, + doFinalizer, + copy_function, + copy_function, + data_req_load_info, + }, + { + // 18 + "tag", + TSDB_FUNC_TAG_DUMMY, + TSDB_FUNC_TAG_DUMMY, + TSDB_BASE_FUNC_SO, + function_setup, + tag_function, + noop2, + no_next_step, + doFinalizer, + copy_function, + copy_function, + no_data_info, + }, + { + // 19 + "ts", + TSDB_FUNC_TS_COMP, + TSDB_FUNC_TS_COMP, + TSDB_FUNCSTATE_MO | TSDB_FUNCSTATE_NEED_TS, + ts_comp_function_setup, + ts_comp_function, + ts_comp_function_f, + no_next_step, + ts_comp_finalize, + copy_function, + copy_function, + data_req_load_info, + }, + { + // 20 + "tag", + TSDB_FUNC_TAG, + TSDB_FUNC_TAG, + TSDB_BASE_FUNC_SO, + function_setup, + tag_function, + tag_function_f, + no_next_step, + doFinalizer, + copy_function, + copy_function, + no_data_info, + }, + { + // 21, column project sql function + "colprj", + TSDB_FUNC_PRJ, + TSDB_FUNC_PRJ, + TSDB_BASE_FUNC_MO | TSDB_FUNCSTATE_NEED_TS, + function_setup, + col_project_function, + col_project_function_f, + no_next_step, + doFinalizer, + copy_function, + copy_function, + data_req_load_info, + }, + { + // 22, multi-output, tag function has only one result + "tagprj", + TSDB_FUNC_TAGPRJ, + TSDB_FUNC_TAGPRJ, + TSDB_BASE_FUNC_MO, + function_setup, + tag_project_function, + tag_project_function_f, + no_next_step, + doFinalizer, + copy_function, + copy_function, + no_data_info, + }, + { + // 23 + "arithmetic", + TSDB_FUNC_ARITHM, + TSDB_FUNC_ARITHM, + TSDB_FUNCSTATE_MO | TSDB_FUNCSTATE_METRIC | TSDB_FUNCSTATE_NEED_TS, + function_setup, + arithmetic_function, + arithmetic_function_f, + no_next_step, + doFinalizer, + copy_function, + copy_function, + data_req_load_info, + }, + { + // 24 + "diff", + TSDB_FUNC_DIFF, + TSDB_FUNC_INVALID_ID, + TSDB_FUNCSTATE_MO | TSDB_FUNCSTATE_NEED_TS, + diff_function_setup, + diff_function, + diff_function_f, + no_next_step, + doFinalizer, + noop1, + noop1, + data_req_load_info, + }, + // distributed version used in two-stage aggregation processes + { + // 25 + "first_dist", + TSDB_FUNC_FIRST_DST, + TSDB_FUNC_FIRST_DST, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS | TSDB_FUNCSTATE_SELECTIVITY, + first_last_function_setup, + first_dist_function, + first_dist_function_f, + no_next_step, + function_finalizer, + first_dist_func_merge, + first_dist_func_second_merge, + first_dist_data_req_info, + }, + { + // 26 + "last_dist", + TSDB_FUNC_LAST_DST, + TSDB_FUNC_LAST_DST, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS | TSDB_FUNCSTATE_SELECTIVITY, + first_last_function_setup, + last_dist_function, + last_dist_function_f, + no_next_step, + function_finalizer, + last_dist_func_merge, + last_dist_func_second_merge, + last_dist_data_req_info, + }, + { + // 27 + "interp", + TSDB_FUNC_INTERP, + TSDB_FUNC_INTERP, + TSDB_FUNCSTATE_SO | TSDB_FUNCSTATE_OF | TSDB_FUNCSTATE_METRIC | TSDB_FUNCSTATE_NEED_TS, + function_setup, + interp_function, + do_sum_f, // todo filter handle + no_next_step, + doFinalizer, + noop1, + copy_function, + no_data_info, + }, + { + // 28 + "rate", + TSDB_FUNC_RATE, + TSDB_FUNC_RATE, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, + rate_function_setup, + rate_function, + rate_function_f, + no_next_step, + rate_finalizer, + rate_func_merge, + rate_func_copy, + data_req_load_info, + }, + { + // 29 + "irate", + TSDB_FUNC_IRATE, + TSDB_FUNC_IRATE, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, + rate_function_setup, + irate_function, + irate_function_f, + no_next_step, + rate_finalizer, + rate_func_merge, + rate_func_copy, + data_req_load_info, + }, + { + // 30 + "sum_rate", + TSDB_FUNC_SUM_RATE, + TSDB_FUNC_SUM_RATE, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, + rate_function_setup, + rate_function, + rate_function_f, + no_next_step, + sumrate_finalizer, + sumrate_func_merge, + sumrate_func_second_merge, + data_req_load_info, + }, + { + // 31 + "sum_irate", + TSDB_FUNC_SUM_IRATE, + TSDB_FUNC_SUM_IRATE, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, + rate_function_setup, + irate_function, + irate_function_f, + no_next_step, + sumrate_finalizer, + sumrate_func_merge, + sumrate_func_second_merge, + data_req_load_info, + }, + { + // 32 + "avg_rate", + TSDB_FUNC_AVG_RATE, + TSDB_FUNC_AVG_RATE, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, + rate_function_setup, + rate_function, + rate_function_f, + no_next_step, + sumrate_finalizer, + sumrate_func_merge, + sumrate_func_second_merge, + data_req_load_info, + }, + { + // 33 + "avg_irate", + TSDB_FUNC_AVG_IRATE, + TSDB_FUNC_AVG_IRATE, + TSDB_BASE_FUNC_SO | TSDB_FUNCSTATE_NEED_TS, + rate_function_setup, + irate_function, + irate_function_f, + no_next_step, + sumrate_finalizer, + sumrate_func_merge, + sumrate_func_second_merge, + data_req_load_info, + }}; diff --git a/src/client/src/tscJoinProcess.c b/src/client/src/tscJoinProcess.c deleted file mode 100644 index afd8e98edac4522089e0aaafa02237da67bdd1cc..0000000000000000000000000000000000000000 --- a/src/client/src/tscJoinProcess.c +++ /dev/null @@ -1,1731 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#include "os.h" -#include "tscJoinProcess.h" -#include "tcache.h" -#include "tscUtil.h" -#include "tsclient.h" -#include "tscompression.h" -#include "ttime.h" -#include "tutil.h" - -static void freeSubqueryObj(SSqlObj* pSql); - -static bool doCompare(int32_t order, int64_t left, int64_t right) { - if (order == TSQL_SO_ASC) { - return left < right; - } else { - return left > right; - } -} - -static int64_t doTSBlockIntersect(SSqlObj* pSql, SJoinSubquerySupporter* pSupporter1, - SJoinSubquerySupporter* pSupporter2, TSKEY* st, TSKEY* et) { - STSBuf* output1 = tsBufCreate(true); - STSBuf* output2 = tsBufCreate(true); - - *st = INT64_MAX; - *et = INT64_MIN; - - SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, pSql->cmd.clauseIndex); - - SLimitVal* pLimit = &pQueryInfo->limit; - int32_t order = pQueryInfo->order.order; - - SQueryInfo* pSubQueryInfo1 = tscGetQueryInfoDetail(&pSql->pSubs[0]->cmd, 0); - SQueryInfo* pSubQueryInfo2 = tscGetQueryInfoDetail(&pSql->pSubs[1]->cmd, 0); - - pSubQueryInfo1->tsBuf = output1; - pSubQueryInfo2->tsBuf = output2; - - tsBufResetPos(pSupporter1->pTSBuf); - tsBufResetPos(pSupporter2->pTSBuf); - - // TODO add more details information - if (!tsBufNextPos(pSupporter1->pTSBuf)) { - tsBufFlush(output1); - tsBufFlush(output2); - - tscTrace("%p input1 is empty, 0 for secondary query after ts blocks intersecting", pSql); - return 0; - } - - if (!tsBufNextPos(pSupporter2->pTSBuf)) { - tsBufFlush(output1); - tsBufFlush(output2); - - tscTrace("%p input2 is empty, 0 for secondary query after ts blocks intersecting", pSql); - return 0; - } - - int64_t numOfInput1 = 1; - int64_t numOfInput2 = 1; - - while (1) { - STSElem elem1 = tsBufGetElem(pSupporter1->pTSBuf); - STSElem elem2 = tsBufGetElem(pSupporter2->pTSBuf); - -#ifdef _DEBUG_VIEW - // for debug purpose - tscPrint("%" PRId64 ", tags:%d \t %" PRId64 ", tags:%d", elem1.ts, elem1.tag, elem2.ts, elem2.tag); -#endif - - if (elem1.tag < elem2.tag || (elem1.tag == elem2.tag && doCompare(order, elem1.ts, elem2.ts))) { - if (!tsBufNextPos(pSupporter1->pTSBuf)) { - break; - } - - numOfInput1++; - } else if (elem1.tag > elem2.tag || (elem1.tag == elem2.tag && doCompare(order, elem2.ts, elem1.ts))) { - if (!tsBufNextPos(pSupporter2->pTSBuf)) { - break; - } - - numOfInput2++; - } else { - /* - * in case of stable query, limit/offset is not applied here. the limit/offset is applied to the - * final results which is acquired after the secondry merge of in the client. - */ - if (pLimit->offset == 0 || pQueryInfo->intervalTime > 0 || QUERY_IS_STABLE_QUERY(pQueryInfo->type)) { - if (*st > elem1.ts) { - *st = elem1.ts; - } - - if (*et < elem1.ts) { - *et = elem1.ts; - } - - tsBufAppend(output1, elem1.vnode, elem1.tag, (const char*)&elem1.ts, sizeof(elem1.ts)); - tsBufAppend(output2, elem2.vnode, elem2.tag, (const char*)&elem2.ts, sizeof(elem2.ts)); - } else { - pLimit->offset -= 1; - } - - if (!tsBufNextPos(pSupporter1->pTSBuf)) { - break; - } - - numOfInput1++; - - if (!tsBufNextPos(pSupporter2->pTSBuf)) { - break; - } - - numOfInput2++; - } - } - - /* - * failed to set the correct ts order yet in two cases: - * 1. only one element - * 2. only one element for each tag. - */ - if (output1->tsOrder == -1) { - output1->tsOrder = TSQL_SO_ASC; - output2->tsOrder = TSQL_SO_ASC; - } - - tsBufFlush(output1); - tsBufFlush(output2); - - tsBufDestory(pSupporter1->pTSBuf); - tsBufDestory(pSupporter2->pTSBuf); - - tscTrace("%p input1:%" PRId64 ", input2:%" PRId64 ", final:%" PRId64 " for secondary query after ts blocks " - "intersecting, skey:%" PRId64 ", ekey:%" PRId64, pSql, - numOfInput1, numOfInput2, output1->numOfTotal, *st, *et); - - return output1->numOfTotal; -} - -// todo handle failed to create sub query -SJoinSubquerySupporter* tscCreateJoinSupporter(SSqlObj* pSql, SSubqueryState* pState, int32_t index) { - SJoinSubquerySupporter* pSupporter = calloc(1, sizeof(SJoinSubquerySupporter)); - if (pSupporter == NULL) { - return NULL; - } - - pSupporter->pObj = pSql; - pSupporter->pState = pState; - - pSupporter->subqueryIndex = index; - SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, pSql->cmd.clauseIndex); - - pSupporter->interval = pQueryInfo->intervalTime; - pSupporter->limit = pQueryInfo->limit; - - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfo(&pSql->cmd, pSql->cmd.clauseIndex, index); - pSupporter->uid = pMeterMetaInfo->pMeterMeta->uid; - - assert (pSupporter->uid != 0); - - getTmpfilePath("join-", pSupporter->path); - pSupporter->f = fopen(pSupporter->path, "w"); - - if (pSupporter->f == NULL) { - tscError("%p failed to create tmp file:%s, reason:%s", pSql, pSupporter->path, strerror(errno)); - } - - return pSupporter; -} - -void tscDestroyJoinSupporter(SJoinSubquerySupporter* pSupporter) { - if (pSupporter == NULL) { - return; - } - - tscSqlExprInfoDestroy(&pSupporter->exprsInfo); - tscColumnBaseInfoDestroy(&pSupporter->colList); - - tscClearFieldInfo(&pSupporter->fieldsInfo); - - if (pSupporter->f != NULL) { - fclose(pSupporter->f); - unlink(pSupporter->path); - } - - tscTagCondRelease(&pSupporter->tagCond); - free(pSupporter); -} - -/* - * need the secondary query process - * In case of count(ts)/count(*)/spread(ts) query, that are only applied to - * primary timestamp column , the secondary query is not necessary - * - */ -bool needSecondaryQuery(SQueryInfo* pQueryInfo) { - for (int32_t i = 0; i < pQueryInfo->colList.numOfCols; ++i) { - SColumnBase* pBase = tscColumnBaseInfoGet(&pQueryInfo->colList, i); - if (pBase->colIndex.columnIndex != PRIMARYKEY_TIMESTAMP_COL_INDEX) { - return true; - } - } - - return false; -} - -/* - * launch secondary stage query to fetch the result that contains timestamp in set - */ -int32_t tscLaunchSecondPhaseSubqueries(SSqlObj* pSql) { - int32_t numOfSub = 0; - SJoinSubquerySupporter* pSupporter = NULL; - - /* - * If the columns are not involved in the final select clause, the secondary query will not be launched - * for the subquery. - */ - SSubqueryState* pState = NULL; - - for (int32_t i = 0; i < pSql->numOfSubs; ++i) { - pSupporter = pSql->pSubs[i]->param; - if (pSupporter->exprsInfo.numOfExprs > 0) { - ++numOfSub; - } - } - - assert(numOfSub > 0); - - // scan all subquery, if one sub query has only ts, ignore it - tscTrace("%p start to launch secondary subqueries, total:%d, only:%d needs to query, others are not retrieve in " - "select clause", pSql, pSql->numOfSubs, numOfSub); - - /* - * the subqueries that do not actually launch the secondary query to virtual node is set as completed. - */ - pState = pSupporter->pState; - pState->numOfTotal = pSql->numOfSubs; - pState->numOfCompleted = (pSql->numOfSubs - numOfSub); - - bool success = true; - - for (int32_t i = 0; i < pSql->numOfSubs; ++i) { - SSqlObj *pPrevSub = pSql->pSubs[i]; - pSql->pSubs[i] = NULL; - - pSupporter = pPrevSub->param; - - if (pSupporter->exprsInfo.numOfExprs == 0) { - tscTrace("%p subIndex: %d, not need to launch query, ignore it", pSql, i); - - tscDestroyJoinSupporter(pSupporter); - tscFreeSqlObj(pPrevSub); - - pSql->pSubs[i] = NULL; - continue; - } - - SQueryInfo *pSubQueryInfo = tscGetQueryInfoDetail(&pPrevSub->cmd, 0); - STSBuf *pTSBuf = pSubQueryInfo->tsBuf; - pSubQueryInfo->tsBuf = NULL; - - // free result for async object will also free sqlObj - assert(pSubQueryInfo->exprsInfo.numOfExprs == 1); // ts_comp query only requires one resutl columns - taos_free_result(pPrevSub); - - SSqlObj *pNew = createSubqueryObj(pSql, (int16_t) i, tscJoinQueryCallback, pSupporter, NULL); - if (pNew == NULL) { - tscDestroyJoinSupporter(pSupporter); - success = false; - break; - } - - tscClearSubqueryInfo(&pNew->cmd); - pSql->pSubs[i] = pNew; - - SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); - pQueryInfo->tsBuf = pTSBuf; // transfer the ownership of timestamp comp-z data to the new created object - - // set the second stage sub query for join process - pQueryInfo->type |= TSDB_QUERY_TYPE_JOIN_SEC_STAGE; - - pQueryInfo->intervalTime = pSupporter->interval; - pQueryInfo->groupbyExpr = pSupporter->groupbyExpr; - - tscColumnBaseInfoCopy(&pQueryInfo->colList, &pSupporter->colList, 0); - tscTagCondCopy(&pQueryInfo->tagCond, &pSupporter->tagCond); - - tscSqlExprCopy(&pQueryInfo->exprsInfo, &pSupporter->exprsInfo, pSupporter->uid, false); - tscFieldInfoCopyAll(&pQueryInfo->fieldsInfo, &pSupporter->fieldsInfo); - - pSupporter->exprsInfo.numOfExprs = 0; - pSupporter->fieldsInfo.numOfOutputCols = 0; - - /* - * if the first column of the secondary query is not ts function, add this function. - * Because this column is required to filter with timestamp after intersecting. - */ - if (pSupporter->exprsInfo.pExprs[0]->functionId != TSDB_FUNC_TS) { - tscAddTimestampColumn(pQueryInfo, TSDB_FUNC_TS, 0); - } - - SQueryInfo *pNewQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); - assert(pNew->numOfSubs == 0 && pNew->cmd.numOfClause == 1 && pNewQueryInfo->numOfTables == 1); - - tscFieldInfoCalOffset(pNewQueryInfo); - - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pNewQueryInfo, 0); - - /* - * When handling the projection query, the offset value will be modified for table-table join, which is changed - * during the timestamp intersection. - */ - pSupporter->limit = pQueryInfo->limit; - pNewQueryInfo->limit = pSupporter->limit; - - // fetch the join tag column - if (UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { - SSqlExpr *pExpr = tscSqlExprGet(pNewQueryInfo, 0); - assert(pQueryInfo->tagCond.joinInfo.hasJoin); - - int16_t tagColIndex = tscGetJoinTagColIndexByUid(&pQueryInfo->tagCond, pMeterMetaInfo->pMeterMeta->uid); - pExpr->param[0].i64Key = tagColIndex; - pExpr->numOfParams = 1; - } - - tscPrintSelectClause(pNew, 0); - - tscTrace("%p subquery:%p tableIndex:%d, vnodeIdx:%d, type:%d, exprInfo:%d, colList:%d, fieldsInfo:%d, name:%s", - pSql, pNew, 0, pMeterMetaInfo->vnodeIndex, pNewQueryInfo->type, - pNewQueryInfo->exprsInfo.numOfExprs, pNewQueryInfo->colList.numOfCols, - pNewQueryInfo->fieldsInfo.numOfOutputCols, pNewQueryInfo->pMeterInfo[0]->name); - } - - //prepare the subqueries object failed, abort - if (!success) { - pSql->res.code = TSDB_CODE_CLI_OUT_OF_MEMORY; - tscError("%p failed to prepare subqueries objs for secondary phase query, numOfSub:%d, code:%d", pSql, - pSql->numOfSubs, pSql->res.code); - freeSubqueryObj(pSql); - - return pSql->res.code; - } - - for(int32_t i = 0; i < pSql->numOfSubs; ++i) { - SSqlObj* pSub = pSql->pSubs[i]; - if (pSub == NULL) { - continue; - } - - tscProcessSql(pSub); - } - - return TSDB_CODE_SUCCESS; -} - -static void freeSubqueryObj(SSqlObj* pSql) { - SSubqueryState* pState = NULL; - - for (int32_t i = 0; i < pSql->numOfSubs; ++i) { - if (pSql->pSubs[i] != NULL) { - SJoinSubquerySupporter* p = pSql->pSubs[i]->param; - pState = p->pState; - - tscDestroyJoinSupporter(p); - - if (pSql->pSubs[i]->res.code == TSDB_CODE_SUCCESS) { - taos_free_result(pSql->pSubs[i]); - } - } - } - - tfree(pState); - pSql->numOfSubs = 0; -} - -static void doQuitSubquery(SSqlObj* pParentSql) { - freeSubqueryObj(pParentSql); - - tsem_wait(&pParentSql->emptyRspSem); - tsem_wait(&pParentSql->emptyRspSem); - - tsem_post(&pParentSql->rspSem); -} - -static void quitAllSubquery(SSqlObj* pSqlObj, SJoinSubquerySupporter* pSupporter) { - int32_t numOfTotal = pSupporter->pState->numOfTotal; - int32_t finished = atomic_add_fetch_32(&pSupporter->pState->numOfCompleted, 1); - - if (finished >= numOfTotal) { - pSqlObj->res.code = abs(pSupporter->pState->code); - tscError("%p all subquery return and query failed, global code:%d", pSqlObj, pSqlObj->res.code); - - doQuitSubquery(pSqlObj); - } -} - -// update the query time range according to the join results on timestamp -static void updateQueryTimeRange(SQueryInfo* pQueryInfo, int64_t st, int64_t et) { - assert(pQueryInfo->stime <= st && pQueryInfo->etime >= et); - - pQueryInfo->stime = st; - pQueryInfo->etime = et; -} - -static void joinRetrieveCallback(void* param, TAOS_RES* tres, int numOfRows) { - SJoinSubquerySupporter* pSupporter = (SJoinSubquerySupporter*)param; - SSqlObj* pParentSql = pSupporter->pObj; - - SSqlObj* pSql = (SSqlObj*)tres; - SSqlCmd* pCmd = &pSql->cmd; - SSqlRes* pRes = &pSql->res; - - SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); - - if ((pQueryInfo->type & TSDB_QUERY_TYPE_JOIN_SEC_STAGE) == 0) { - if (pSupporter->pState->code != TSDB_CODE_SUCCESS) { - tscError("%p abort query due to other subquery failure. code:%d, global code:%d", pSql, numOfRows, - pSupporter->pState->code); - - quitAllSubquery(pParentSql, pSupporter); - return; - } - - if (numOfRows > 0) { // write the data into disk - fwrite(pSql->res.data, pSql->res.numOfRows, 1, pSupporter->f); - fclose(pSupporter->f); - - STSBuf* pBuf = tsBufCreateFromFile(pSupporter->path, true); - if (pBuf == NULL) { - tscError("%p invalid ts comp file from vnode, abort sub query, file size:%d", pSql, numOfRows); - - pSupporter->pState->code = TSDB_CODE_APP_ERROR; // todo set the informative code - quitAllSubquery(pParentSql, pSupporter); - return; - } - - if (pSupporter->pTSBuf == NULL) { - tscTrace("%p create tmp file for ts block:%s", pSql, pBuf->path); - pSupporter->pTSBuf = pBuf; - } else { - assert(pQueryInfo->numOfTables == 1); // for subquery, only one metermetaInfo - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - - tsBufMerge(pSupporter->pTSBuf, pBuf, pMeterMetaInfo->vnodeIndex); - tsBufDestory(pBuf); - } - - // open new file to save the result - getTmpfilePath("ts-join", pSupporter->path); - pSupporter->f = fopen(pSupporter->path, "w"); - pSql->res.row = pSql->res.numOfRows; - - taos_fetch_rows_a(tres, joinRetrieveCallback, param); - } else if (numOfRows == 0) { // no data from this vnode anymore - SQueryInfo* pParentQueryInfo = tscGetQueryInfoDetail(&pParentSql->cmd, pParentSql->cmd.clauseIndex); - - //todo refactor - if (tscNonOrderedProjectionQueryOnSTable(pParentQueryInfo, 0)) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - assert(pQueryInfo->numOfTables == 1); - - // for projection query, need to try next vnode - int32_t totalVnode = pMeterMetaInfo->pMetricMeta->numOfVnodes; - if ((++pMeterMetaInfo->vnodeIndex) < totalVnode) { - tscTrace("%p current vnode:%d exhausted, try next:%d. total vnode:%d. current numOfRes:%d", pSql, - pMeterMetaInfo->vnodeIndex - 1, pMeterMetaInfo->vnodeIndex, totalVnode, pRes->numOfTotal); - - pSql->cmd.command = TSDB_SQL_SELECT; - pSql->fp = tscJoinQueryCallback; - tscProcessSql(pSql); - - return; - } - } - - int32_t numOfTotal = pSupporter->pState->numOfTotal; - int32_t finished = atomic_add_fetch_32(&pSupporter->pState->numOfCompleted, 1); - - if (finished >= numOfTotal) { - assert(finished == numOfTotal); - - if (pSupporter->pState->code != TSDB_CODE_SUCCESS) { - tscTrace("%p sub:%p, numOfSub:%d, quit from further procedure due to other queries failure", pParentSql, tres, - pSupporter->subqueryIndex); - doQuitSubquery(pParentSql); - return; - } - - tscTrace("%p all subqueries retrieve ts complete, do ts block intersect", pParentSql); - - SJoinSubquerySupporter* p1 = pParentSql->pSubs[0]->param; - SJoinSubquerySupporter* p2 = pParentSql->pSubs[1]->param; - - TSKEY st, et; - - int64_t num = doTSBlockIntersect(pParentSql, p1, p2, &st, &et); - if (num <= 0) { // no result during ts intersect - tscTrace("%p free all sub SqlObj and quit", pParentSql); - doQuitSubquery(pParentSql); - } else { - updateQueryTimeRange(pParentQueryInfo, st, et); - tscLaunchSecondPhaseSubqueries(pParentSql); - } - } - } else { // failure of sub query - tscError("%p sub query failed, code:%d, index:%d", pSql, numOfRows, pSupporter->subqueryIndex); - pSupporter->pState->code = numOfRows; - - quitAllSubquery(pParentSql, pSupporter); - return; - } - - } else { // secondary stage retrieve, driven by taos_fetch_row or other functions - if (numOfRows < 0) { - pSupporter->pState->code = numOfRows; - tscError("%p retrieve failed, code:%d, index:%d", pSql, numOfRows, pSupporter->subqueryIndex); - } - - if (numOfRows >= 0) { - pSql->res.numOfTotal += pSql->res.numOfRows; - } - - if (tscNonOrderedProjectionQueryOnSTable(pQueryInfo, 0) && numOfRows == 0) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - assert(pQueryInfo->numOfTables == 1); - - // for projection query, need to try next vnode if current vnode is exhausted - if ((++pMeterMetaInfo->vnodeIndex) < pMeterMetaInfo->pMetricMeta->numOfVnodes) { - pSupporter->pState->numOfCompleted = 0; - pSupporter->pState->numOfTotal = 1; - - pSql->cmd.command = TSDB_SQL_SELECT; - pSql->fp = tscJoinQueryCallback; - tscProcessSql(pSql); - - return; - } - } - - int32_t numOfTotal = pSupporter->pState->numOfTotal; - int32_t finished = atomic_add_fetch_32(&pSupporter->pState->numOfCompleted, 1); - - if (finished >= numOfTotal) { - assert(finished == numOfTotal); - tscTrace("%p all %d secondary subquery retrieves completed, global code:%d", tres, numOfTotal, - pParentSql->res.code); - - if (pSupporter->pState->code != TSDB_CODE_SUCCESS) { - pParentSql->res.code = abs(pSupporter->pState->code); - freeSubqueryObj(pParentSql); - } - - tsem_post(&pParentSql->rspSem); - } else { - tscTrace("%p sub:%p completed, completed:%d, total:%d", pParentSql, tres, finished, numOfTotal); - } - } -} - -static SJoinSubquerySupporter* tscUpdateSubqueryStatus(SSqlObj* pSql, int32_t numOfFetch) { - int32_t notInvolved = 0; - SJoinSubquerySupporter* pSupporter = NULL; - SSubqueryState* pState = NULL; - - for(int32_t i = 0; i < pSql->numOfSubs; ++i) { - if (pSql->pSubs[i] == NULL) { - notInvolved++; - } else { - pSupporter = (SJoinSubquerySupporter*)pSql->pSubs[i]->param; - pState = pSupporter->pState; - } - } - - pState->numOfTotal = pSql->numOfSubs; - pState->numOfCompleted = pSql->numOfSubs - numOfFetch; - - return pSupporter; -} - -void tscFetchDatablockFromSubquery(SSqlObj* pSql) { - int32_t numOfFetch = 0; - assert(pSql->numOfSubs >= 1); - - for (int32_t i = 0; i < pSql->numOfSubs; ++i) { - if (pSql->pSubs[i] == NULL) { // this subquery does not need to involve in secondary query - continue; - } - - SSqlRes *pRes = &pSql->pSubs[i]->res; - SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSql->pSubs[i]->cmd, 0); - - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - - if (tscNonOrderedProjectionQueryOnSTable(pQueryInfo, 0)) { - if (pRes->row >= pRes->numOfRows && pMeterMetaInfo->vnodeIndex < pMeterMetaInfo->pMetricMeta->numOfVnodes && - (!tscHasReachLimitation(pQueryInfo, pRes))) { - numOfFetch++; - } - } else { - if (pRes->row >= pRes->numOfRows && (!tscHasReachLimitation(pQueryInfo, pRes))) { - numOfFetch++; - } - } - } - - if (numOfFetch <= 0) { - return; - } - - // TODO multi-vnode retrieve for projection query with limitation has bugs, since the global limiation is not handled - tscTrace("%p retrieve data from %d subqueries", pSql, numOfFetch); - - SJoinSubquerySupporter* pSupporter = tscUpdateSubqueryStatus(pSql, numOfFetch); - - for (int32_t i = 0; i < pSql->numOfSubs; ++i) { - SSqlObj* pSql1 = pSql->pSubs[i]; - if (pSql1 == NULL) { - continue; - } - - SSqlRes* pRes1 = &pSql1->res; - SSqlCmd* pCmd1 = &pSql1->cmd; - - pSupporter = (SJoinSubquerySupporter*)pSql1->param; - - // wait for all subqueries completed - SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd1, 0); - assert(pRes1->numOfRows >= 0 && pQueryInfo->numOfTables == 1); - - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - - if (pRes1->row >= pRes1->numOfRows) { - tscTrace("%p subquery:%p retrieve data from vnode, subquery:%d, vnodeIndex:%d", pSql, pSql1, - pSupporter->subqueryIndex, pMeterMetaInfo->vnodeIndex); - - tscResetForNextRetrieve(pRes1); - pSql1->fp = joinRetrieveCallback; - - if (pCmd1->command < TSDB_SQL_LOCAL) { - pCmd1->command = (pCmd1->command > TSDB_SQL_MGMT) ? TSDB_SQL_RETRIEVE : TSDB_SQL_FETCH; - } - - tscProcessSql(pSql1); - } - } - - // wait for all subquery completed - tsem_wait(&pSql->rspSem); - - // update the records for each subquery - for(int32_t i = 0; i < pSql->numOfSubs; ++i) { - if (pSql->pSubs[i] == NULL) { - continue; - } - - SSqlRes* pRes1 = &pSql->pSubs[i]->res; - pRes1->numOfTotalInCurrentClause += pRes1->numOfRows; - } -} - -// all subqueries return, set the result output index -void tscSetupOutputColumnIndex(SSqlObj* pSql) { - SSqlCmd* pCmd = &pSql->cmd; - SSqlRes* pRes = &pSql->res; - - tscTrace("%p all subquery response, retrieve data", pSql); - - if (pRes->pColumnIndex != NULL) { - return; // the column transfer support struct has been built - } - - SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); - pRes->pColumnIndex = calloc(1, sizeof(SColumnIndex) * pQueryInfo->fieldsInfo.numOfOutputCols); - - for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutputCols; ++i) { - SSqlExpr* pExpr = tscSqlExprGet(pQueryInfo, i); - - int32_t tableIndexOfSub = -1; - for (int32_t j = 0; j < pQueryInfo->numOfTables; ++j) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, j); - if (pMeterMetaInfo->pMeterMeta->uid == pExpr->uid) { - tableIndexOfSub = j; - break; - } - } - - assert(tableIndexOfSub >= 0 && tableIndexOfSub < pQueryInfo->numOfTables); - - SSqlCmd* pSubCmd = &pSql->pSubs[tableIndexOfSub]->cmd; - SQueryInfo* pSubQueryInfo = tscGetQueryInfoDetail(pSubCmd, 0); - - for (int32_t k = 0; k < pSubQueryInfo->exprsInfo.numOfExprs; ++k) { - SSqlExpr* pSubExpr = tscSqlExprGet(pSubQueryInfo, k); - if (pExpr->functionId == pSubExpr->functionId && pExpr->colInfo.colId == pSubExpr->colInfo.colId) { - pRes->pColumnIndex[i] = (SColumnIndex){.tableIndex = tableIndexOfSub, .columnIndex = k}; - break; - } - } - } -} - -void tscJoinQueryCallback(void* param, TAOS_RES* tres, int code) { - SSqlObj* pSql = (SSqlObj*)tres; - // SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(&pSql->cmd, 0, 0); - - // int32_t idx = pSql->cmd.vnodeIdx; - - // SVnodeSidList *vnodeInfo = NULL; - // if (pMeterMetaInfo->pMetricMeta != NULL) { - // vnodeInfo = tscGetVnodeSidList(pMeterMetaInfo->pMetricMeta, idx - 1); - // } - - SJoinSubquerySupporter* pSupporter = (SJoinSubquerySupporter*)param; - - // if (atomic_add_fetch_32(pSupporter->numOfComplete, 1) >= - // pSupporter->numOfTotal) { - // SSqlObj *pParentObj = pSupporter->pObj; - // - // if ((pSql->cmd.type & TSDB_QUERY_TYPE_JOIN_SEC_STAGE) != 1) { - // int32_t num = 0; - // tscFetchDatablockFromSubquery(pParentObj); - // TSKEY* ts = tscGetQualifiedTSList(pParentObj, &num); - // - // if (num <= 0) { - // // no qualified result - // } - // - // tscLaunchSecondPhaseSubqueries(pSql, ts, num); - // } else { - - // } - // } else { - SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, 0); - if ((pQueryInfo->type & TSDB_QUERY_TYPE_JOIN_SEC_STAGE) != TSDB_QUERY_TYPE_JOIN_SEC_STAGE) { - if (code != TSDB_CODE_SUCCESS) { // direct call joinRetrieveCallback and set the error code - joinRetrieveCallback(param, pSql, code); - } else { // first stage query, continue to retrieve data - pSql->fp = joinRetrieveCallback; - pSql->cmd.command = TSDB_SQL_FETCH; - tscProcessSql(pSql); - } - - } else { // second stage join subquery - SSqlObj* pParentSql = pSupporter->pObj; - - if (pSupporter->pState->code != TSDB_CODE_SUCCESS) { - tscError("%p abort query due to other subquery failure. code:%d, global code:%d", pSql, code, - pSupporter->pState->code); - quitAllSubquery(pParentSql, pSupporter); - - return; - } - - if (code != TSDB_CODE_SUCCESS) { - tscError("%p sub query failed, code:%d, set global code:%d, index:%d", pSql, code, code, - pSupporter->subqueryIndex); - pSupporter->pState->code = code; // todo set the informative code - - quitAllSubquery(pParentSql, pSupporter); - } else { - int32_t numOfTotal = pSupporter->pState->numOfTotal; - int32_t finished = atomic_add_fetch_32(&pSupporter->pState->numOfCompleted, 1); - - if (finished >= numOfTotal) { - assert(finished == numOfTotal); - - tscSetupOutputColumnIndex(pParentSql); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - - /** - * if the query is a continue query (vnodeIndex > 0 for projection query) for next vnode, do the retrieval of - * data instead of returning to its invoker - */ - if (pMeterMetaInfo->vnodeIndex > 0 && tscNonOrderedProjectionQueryOnSTable(pQueryInfo, 0)) { - assert(pMeterMetaInfo->vnodeIndex < pMeterMetaInfo->pMetricMeta->numOfVnodes); - pSupporter->pState->numOfCompleted = 0; // reset the record value - - pSql->fp = joinRetrieveCallback; // continue retrieve data - pSql->cmd.command = TSDB_SQL_FETCH; - tscProcessSql(pSql); - } else { // first retrieve from vnode during the secondary stage sub-query - if (pParentSql->fp == NULL) { - tsem_wait(&pParentSql->emptyRspSem); - tsem_wait(&pParentSql->emptyRspSem); - - tsem_post(&pParentSql->rspSem); - } else { - // set the command flag must be after the semaphore been correctly set. - // pPObj->cmd.command = TSDB_SQL_RETRIEVE_METRIC; - // if (pPObj->res.code == TSDB_CODE_SUCCESS) { - // (*pPObj->fp)(pPObj->param, pPObj, 0); - // } else { - // tscQueueAsyncRes(pPObj); - // } - assert(0); - } - } - } - } - } -} - -static int32_t getDataStartOffset() { - return sizeof(STSBufFileHeader) + TS_COMP_FILE_VNODE_MAX * sizeof(STSVnodeBlockInfo); -} - -static int32_t doUpdateVnodeInfo(STSBuf* pTSBuf, int64_t offset, STSVnodeBlockInfo* pVInfo) { - if (offset < 0 || offset >= getDataStartOffset()) { - return -1; - } - - if (fseek(pTSBuf->f, offset, SEEK_SET) != 0) { - return -1; - } - - fwrite(pVInfo, sizeof(STSVnodeBlockInfo), 1, pTSBuf->f); - return 0; -} - -// update prev vnode length info in file -static void TSBufUpdateVnodeInfo(STSBuf* pTSBuf, int32_t index, STSVnodeBlockInfo* pBlockInfo) { - int32_t offset = sizeof(STSBufFileHeader) + index * sizeof(STSVnodeBlockInfo); - doUpdateVnodeInfo(pTSBuf, offset, pBlockInfo); -} - -static STSBuf* allocResForTSBuf(STSBuf* pTSBuf) { - const int32_t INITIAL_VNODEINFO_SIZE = 4; - - pTSBuf->numOfAlloc = INITIAL_VNODEINFO_SIZE; - pTSBuf->pData = calloc(pTSBuf->numOfAlloc, sizeof(STSVnodeBlockInfoEx)); - if (pTSBuf->pData == NULL) { - tsBufDestory(pTSBuf); - return NULL; - } - - pTSBuf->tsData.rawBuf = malloc(MEM_BUF_SIZE); - if (pTSBuf->tsData.rawBuf == NULL) { - tsBufDestory(pTSBuf); - return NULL; - } - - pTSBuf->bufSize = MEM_BUF_SIZE; - pTSBuf->tsData.threshold = MEM_BUF_SIZE; - pTSBuf->tsData.allocSize = MEM_BUF_SIZE; - - pTSBuf->assistBuf = malloc(MEM_BUF_SIZE); - if (pTSBuf->assistBuf == NULL) { - tsBufDestory(pTSBuf); - return NULL; - } - - pTSBuf->block.payload = malloc(MEM_BUF_SIZE); - if (pTSBuf->block.payload == NULL) { - tsBufDestory(pTSBuf); - return NULL; - } - - pTSBuf->fileSize += getDataStartOffset(); - return pTSBuf; -} - -static int32_t STSBufUpdateHeader(STSBuf* pTSBuf, STSBufFileHeader* pHeader); - -/** - * todo error handling - * support auto closeable tmp file - * @param path - * @return - */ -STSBuf* tsBufCreate(bool autoDelete) { - STSBuf* pTSBuf = calloc(1, sizeof(STSBuf)); - if (pTSBuf == NULL) { - return NULL; - } - - getTmpfilePath("join", pTSBuf->path); - pTSBuf->f = fopen(pTSBuf->path, "w+"); - if (pTSBuf->f == NULL) { - free(pTSBuf); - return NULL; - } - - if (NULL == allocResForTSBuf(pTSBuf)) { - return NULL; - } - - // update the header info - STSBufFileHeader header = {.magic = TS_COMP_FILE_MAGIC, .numOfVnode = pTSBuf->numOfVnodes, .tsOrder = TSQL_SO_ASC}; - STSBufUpdateHeader(pTSBuf, &header); - - tsBufResetPos(pTSBuf); - pTSBuf->cur.order = TSQL_SO_ASC; - - pTSBuf->autoDelete = autoDelete; - pTSBuf->tsOrder = -1; - - return pTSBuf; -} - -STSBuf* tsBufCreateFromFile(const char* path, bool autoDelete) { - STSBuf* pTSBuf = calloc(1, sizeof(STSBuf)); - if (pTSBuf == NULL) { - return NULL; - } - - strncpy(pTSBuf->path, path, PATH_MAX); - - pTSBuf->f = fopen(pTSBuf->path, "r+"); - if (pTSBuf->f == NULL) { - free(pTSBuf); - return NULL; - } - - if (allocResForTSBuf(pTSBuf) == NULL) { - return NULL; - } - - // validate the file magic number - STSBufFileHeader header = {0}; - fseek(pTSBuf->f, 0, SEEK_SET); - fread(&header, 1, sizeof(header), pTSBuf->f); - - // invalid file - if (header.magic != TS_COMP_FILE_MAGIC) { - return NULL; - } - - if (header.numOfVnode > pTSBuf->numOfAlloc) { - pTSBuf->numOfAlloc = header.numOfVnode; - STSVnodeBlockInfoEx* tmp = realloc(pTSBuf->pData, sizeof(STSVnodeBlockInfoEx) * pTSBuf->numOfAlloc); - if (tmp == NULL) { - tsBufDestory(pTSBuf); - return NULL; - } - - pTSBuf->pData = tmp; - } - - pTSBuf->numOfVnodes = header.numOfVnode; - - // check the ts order - pTSBuf->tsOrder = header.tsOrder; - if (pTSBuf->tsOrder != TSQL_SO_ASC && pTSBuf->tsOrder != TSQL_SO_DESC) { - tscError("invalid order info in buf:%d", pTSBuf->tsOrder); - tsBufDestory(pTSBuf); - return NULL; - } - - size_t infoSize = sizeof(STSVnodeBlockInfo) * pTSBuf->numOfVnodes; - - STSVnodeBlockInfo* buf = (STSVnodeBlockInfo*)calloc(1, infoSize); - - //int64_t pos = ftell(pTSBuf->f); //pos not used - fread(buf, infoSize, 1, pTSBuf->f); - - // the length value for each vnode is not kept in file, so does not set the length value - for (int32_t i = 0; i < pTSBuf->numOfVnodes; ++i) { - STSVnodeBlockInfoEx* pBlockList = &pTSBuf->pData[i]; - memcpy(&pBlockList->info, &buf[i], sizeof(STSVnodeBlockInfo)); - } - - free(buf); - - fseek(pTSBuf->f, 0, SEEK_END); - - struct stat fileStat; - fstat(fileno(pTSBuf->f), &fileStat); - - pTSBuf->fileSize = (uint32_t)fileStat.st_size; - tsBufResetPos(pTSBuf); - - // ascending by default - pTSBuf->cur.order = TSQL_SO_ASC; - - pTSBuf->autoDelete = autoDelete; - - tscTrace("create tsBuf from file:%s, fd:%d, size:%d, numOfVnode:%d, autoDelete:%d", pTSBuf->path, fileno(pTSBuf->f), - pTSBuf->fileSize, pTSBuf->numOfVnodes, pTSBuf->autoDelete); - - return pTSBuf; -} - -void* tsBufDestory(STSBuf* pTSBuf) { - if (pTSBuf == NULL) { - return NULL; - } - - tfree(pTSBuf->assistBuf); - tfree(pTSBuf->tsData.rawBuf); - - tfree(pTSBuf->pData); - tfree(pTSBuf->block.payload); - - fclose(pTSBuf->f); - - if (pTSBuf->autoDelete) { - tscTrace("tsBuf %p destroyed, delete tmp file:%s", pTSBuf, pTSBuf->path); - unlink(pTSBuf->path); - } else { - tscTrace("tsBuf %p destroyed, tmp file:%s, remains", pTSBuf, pTSBuf->path); - } - - free(pTSBuf); - return NULL; -} - -static STSVnodeBlockInfoEx* tsBufGetLastVnodeInfo(STSBuf* pTSBuf) { - int32_t last = pTSBuf->numOfVnodes - 1; - - assert(last >= 0); - return &pTSBuf->pData[last]; -} - -static STSVnodeBlockInfoEx* addOneVnodeInfo(STSBuf* pTSBuf, int32_t vnodeId) { - if (pTSBuf->numOfAlloc <= pTSBuf->numOfVnodes) { - uint32_t newSize = (uint32_t)(pTSBuf->numOfAlloc * 1.5); - assert(newSize > pTSBuf->numOfAlloc); - - STSVnodeBlockInfoEx* tmp = (STSVnodeBlockInfoEx*)realloc(pTSBuf->pData, sizeof(STSVnodeBlockInfoEx) * newSize); - if (tmp == NULL) { - return NULL; - } - - pTSBuf->pData = tmp; - pTSBuf->numOfAlloc = newSize; - memset(&pTSBuf->pData[pTSBuf->numOfVnodes], 0, sizeof(STSVnodeBlockInfoEx) * (newSize - pTSBuf->numOfVnodes)); - } - - if (pTSBuf->numOfVnodes > 0) { - STSVnodeBlockInfoEx* pPrevBlockInfoEx = tsBufGetLastVnodeInfo(pTSBuf); - - // update prev vnode length info in file - TSBufUpdateVnodeInfo(pTSBuf, pTSBuf->numOfVnodes - 1, &pPrevBlockInfoEx->info); - } - - // set initial value for vnode block - STSVnodeBlockInfo* pBlockInfo = &pTSBuf->pData[pTSBuf->numOfVnodes].info; - pBlockInfo->vnode = vnodeId; - pBlockInfo->offset = pTSBuf->fileSize; - assert(pBlockInfo->offset >= getDataStartOffset()); - - // update vnode info in file - TSBufUpdateVnodeInfo(pTSBuf, pTSBuf->numOfVnodes, pBlockInfo); - - // add one vnode info - pTSBuf->numOfVnodes += 1; - - // update the header info - STSBufFileHeader header = { - .magic = TS_COMP_FILE_MAGIC, .numOfVnode = pTSBuf->numOfVnodes, .tsOrder = pTSBuf->tsOrder}; - - STSBufUpdateHeader(pTSBuf, &header); - return tsBufGetLastVnodeInfo(pTSBuf); -} - -static void shrinkBuffer(STSList* ptsData) { - // shrink tmp buffer size if it consumes too many memory compared to the pre-defined size - if (ptsData->allocSize >= ptsData->threshold * 2) { - ptsData->rawBuf = realloc(ptsData->rawBuf, MEM_BUF_SIZE); - ptsData->allocSize = MEM_BUF_SIZE; - } -} - -static void writeDataToDisk(STSBuf* pTSBuf) { - if (pTSBuf->tsData.len == 0) { - return; - } - - STSBlock* pBlock = &pTSBuf->block; - - pBlock->numOfElem = pTSBuf->tsData.len / TSDB_KEYSIZE; - pBlock->compLen = - tsCompressTimestamp(pTSBuf->tsData.rawBuf, pTSBuf->tsData.len, pTSBuf->tsData.len / TSDB_KEYSIZE, pBlock->payload, - pTSBuf->tsData.allocSize, TWO_STAGE_COMP, pTSBuf->assistBuf, pTSBuf->bufSize); - - int64_t r = fseek(pTSBuf->f, pTSBuf->fileSize, SEEK_SET); - UNUSED(r); - - /* - * format for output data: - * 1. tags, number of ts, size after compressed, payload, size after compressed - * 2. tags, number of ts, size after compressed, payload, size after compressed - * - * both side has the compressed length is used to support load data forwards/backwords. - */ - fwrite(&pBlock->tag, sizeof(pBlock->tag), 1, pTSBuf->f); - fwrite(&pBlock->numOfElem, sizeof(pBlock->numOfElem), 1, pTSBuf->f); - - fwrite(&pBlock->compLen, sizeof(pBlock->compLen), 1, pTSBuf->f); - - fwrite(pBlock->payload, (size_t)pBlock->compLen, 1, pTSBuf->f); - - fwrite(&pBlock->compLen, sizeof(pBlock->compLen), 1, pTSBuf->f); - - int32_t blockSize = sizeof(pBlock->tag) + sizeof(pBlock->numOfElem) + sizeof(pBlock->compLen) * 2 + pBlock->compLen; - pTSBuf->fileSize += blockSize; - - pTSBuf->tsData.len = 0; - - STSVnodeBlockInfoEx* pVnodeBlockInfoEx = tsBufGetLastVnodeInfo(pTSBuf); - - pVnodeBlockInfoEx->info.compLen += blockSize; - pVnodeBlockInfoEx->info.numOfBlocks += 1; - - shrinkBuffer(&pTSBuf->tsData); -} - -static void expandBuffer(STSList* ptsData, int32_t inputSize) { - if (ptsData->allocSize - ptsData->len < inputSize) { - int32_t newSize = inputSize + ptsData->len; - char* tmp = realloc(ptsData->rawBuf, (size_t)newSize); - if (tmp == NULL) { - // todo - } - - ptsData->rawBuf = tmp; - ptsData->allocSize = newSize; - } -} - -STSBlock* readDataFromDisk(STSBuf* pTSBuf, int32_t order, bool decomp) { - STSBlock* pBlock = &pTSBuf->block; - - // clear the memory buffer - void* tmp = pBlock->payload; - memset(pBlock, 0, sizeof(STSBlock)); - pBlock->payload = tmp; - - if (order == TSQL_SO_DESC) { - /* - * set the right position for the reversed traverse, the reversed traverse is started from - * the end of each comp data block - */ - fseek(pTSBuf->f, -sizeof(pBlock->padding), SEEK_CUR); - fread(&pBlock->padding, sizeof(pBlock->padding), 1, pTSBuf->f); - - pBlock->compLen = pBlock->padding; - int32_t offset = pBlock->compLen + sizeof(pBlock->compLen) * 2 + sizeof(pBlock->numOfElem) + sizeof(pBlock->tag); - fseek(pTSBuf->f, -offset, SEEK_CUR); - } - - fread(&pBlock->tag, sizeof(pBlock->tag), 1, pTSBuf->f); - fread(&pBlock->numOfElem, sizeof(pBlock->numOfElem), 1, pTSBuf->f); - - fread(&pBlock->compLen, sizeof(pBlock->compLen), 1, pTSBuf->f); - fread(pBlock->payload, (size_t)pBlock->compLen, 1, pTSBuf->f); - - if (decomp) { - pTSBuf->tsData.len = - tsDecompressTimestamp(pBlock->payload, pBlock->compLen, pBlock->numOfElem, pTSBuf->tsData.rawBuf, - pTSBuf->tsData.allocSize, TWO_STAGE_COMP, pTSBuf->assistBuf, pTSBuf->bufSize); - } - - // read the comp length at the length of comp block - fread(&pBlock->padding, sizeof(pBlock->padding), 1, pTSBuf->f); - - // for backwards traverse, set the start position at the end of previous block - if (order == TSQL_SO_DESC) { - int32_t offset = pBlock->compLen + sizeof(pBlock->compLen) * 2 + sizeof(pBlock->numOfElem) + sizeof(pBlock->tag); - int64_t r = fseek(pTSBuf->f, -offset, SEEK_CUR); - UNUSED(r); - } - - return pBlock; -} - -// set the order of ts buffer if the ts order has not been set yet -static int32_t setCheckTSOrder(STSBuf* pTSBuf, const char* pData, int32_t len) { - STSList* ptsData = &pTSBuf->tsData; - - if (pTSBuf->tsOrder == -1) { - if (ptsData->len > 0) { - TSKEY lastKey = *(TSKEY*)(ptsData->rawBuf + ptsData->len - TSDB_KEYSIZE); - - if (lastKey > *(TSKEY*)pData) { - pTSBuf->tsOrder = TSQL_SO_DESC; - } else { - pTSBuf->tsOrder = TSQL_SO_ASC; - } - } else if (len > TSDB_KEYSIZE) { - // no data in current vnode, more than one ts is added, check the orders - TSKEY k1 = *(TSKEY*)(pData); - TSKEY k2 = *(TSKEY*)(pData + TSDB_KEYSIZE); - - if (k1 < k2) { - pTSBuf->tsOrder = TSQL_SO_ASC; - } else if (k1 > k2) { - pTSBuf->tsOrder = TSQL_SO_DESC; - } else { - // todo handle error - } - } - } else { - // todo the timestamp order is set, check the asc/desc order of appended data - } - - return TSDB_CODE_SUCCESS; -} - -void tsBufAppend(STSBuf* pTSBuf, int32_t vnodeId, int64_t tag, const char* pData, int32_t len) { - STSVnodeBlockInfoEx* pBlockInfo = NULL; - STSList* ptsData = &pTSBuf->tsData; - - if (pTSBuf->numOfVnodes == 0 || tsBufGetLastVnodeInfo(pTSBuf)->info.vnode != vnodeId) { - writeDataToDisk(pTSBuf); - shrinkBuffer(ptsData); - - pBlockInfo = addOneVnodeInfo(pTSBuf, vnodeId); - } else { - pBlockInfo = tsBufGetLastVnodeInfo(pTSBuf); - } - - assert(pBlockInfo->info.vnode == vnodeId); - - if (pTSBuf->block.tag != tag && ptsData->len > 0) { - // new arrived data with different tags value, save current value into disk first - writeDataToDisk(pTSBuf); - } else { - expandBuffer(ptsData, len); - } - - pTSBuf->block.tag = tag; - memcpy(ptsData->rawBuf + ptsData->len, pData, (size_t)len); - - // todo check return value - setCheckTSOrder(pTSBuf, pData, len); - - ptsData->len += len; - pBlockInfo->len += len; - - pTSBuf->numOfTotal += len / TSDB_KEYSIZE; - - // the size of raw data exceeds the size of the default prepared buffer, so - // during getBufBlock, the output buffer needs to be large enough. - if (ptsData->len >= ptsData->threshold) { - writeDataToDisk(pTSBuf); - shrinkBuffer(ptsData); - } - - tsBufResetPos(pTSBuf); -} - -void tsBufFlush(STSBuf* pTSBuf) { - if (pTSBuf->tsData.len <= 0) { - return; - } - - writeDataToDisk(pTSBuf); - shrinkBuffer(&pTSBuf->tsData); - - STSVnodeBlockInfoEx* pBlockInfoEx = tsBufGetLastVnodeInfo(pTSBuf); - - // update prev vnode length info in file - TSBufUpdateVnodeInfo(pTSBuf, pTSBuf->numOfVnodes - 1, &pBlockInfoEx->info); - - // save the ts order into header - STSBufFileHeader header = { - .magic = TS_COMP_FILE_MAGIC, .numOfVnode = pTSBuf->numOfVnodes, .tsOrder = pTSBuf->tsOrder}; - STSBufUpdateHeader(pTSBuf, &header); - - fsync(fileno(pTSBuf->f)); -} - -static int32_t tsBufFindVnodeIndexFromId(STSVnodeBlockInfoEx* pVnodeInfoEx, int32_t numOfVnodes, int32_t vnodeId) { - int32_t j = -1; - for (int32_t i = 0; i < numOfVnodes; ++i) { - if (pVnodeInfoEx[i].info.vnode == vnodeId) { - j = i; - break; - } - } - - return j; -} - -// todo opt performance by cache blocks info -static int32_t tsBufFindBlock(STSBuf* pTSBuf, STSVnodeBlockInfo* pBlockInfo, int32_t blockIndex) { - if (fseek(pTSBuf->f, pBlockInfo->offset, SEEK_SET) != 0) { - return -1; - } - - // sequentially read the compressed data blocks, start from the beginning of the comp data block of this vnode - int32_t i = 0; - bool decomp = false; - - while ((i++) <= blockIndex) { - if (readDataFromDisk(pTSBuf, TSQL_SO_ASC, decomp) == NULL) { - return -1; - } - } - - // set the file position to be the end of previous comp block - if (pTSBuf->cur.order == TSQL_SO_DESC) { - STSBlock* pBlock = &pTSBuf->block; - int32_t compBlockSize = - pBlock->compLen + sizeof(pBlock->compLen) * 2 + sizeof(pBlock->numOfElem) + sizeof(pBlock->tag); - fseek(pTSBuf->f, -compBlockSize, SEEK_CUR); - } - - return 0; -} - -static int32_t tsBufFindBlockByTag(STSBuf* pTSBuf, STSVnodeBlockInfo* pBlockInfo, int64_t tag) { - bool decomp = false; - - int64_t offset = 0; - if (pTSBuf->cur.order == TSQL_SO_ASC) { - offset = pBlockInfo->offset; - } else { // reversed traverse starts from the end of block - offset = pBlockInfo->offset + pBlockInfo->compLen; - } - - if (fseek(pTSBuf->f, offset, SEEK_SET) != 0) { - return -1; - } - - for (int32_t i = 0; i < pBlockInfo->numOfBlocks; ++i) { - if (readDataFromDisk(pTSBuf, pTSBuf->cur.order, decomp) == NULL) { - return -1; - } - - if (pTSBuf->block.tag == tag) { - return i; - } - } - - return -1; -} - -static void tsBufGetBlock(STSBuf* pTSBuf, int32_t vnodeIndex, int32_t blockIndex) { - STSVnodeBlockInfo* pBlockInfo = &pTSBuf->pData[vnodeIndex].info; - if (pBlockInfo->numOfBlocks <= blockIndex) { - assert(false); - } - - STSCursor* pCur = &pTSBuf->cur; - if (pCur->vnodeIndex == vnodeIndex && ((pCur->blockIndex <= blockIndex && pCur->order == TSQL_SO_ASC) || - (pCur->blockIndex >= blockIndex && pCur->order == TSQL_SO_DESC))) { - int32_t i = 0; - bool decomp = false; - int32_t step = abs(blockIndex - pCur->blockIndex); - - while ((++i) <= step) { - if (readDataFromDisk(pTSBuf, pCur->order, decomp) == NULL) { - return; - } - } - } else { - if (tsBufFindBlock(pTSBuf, pBlockInfo, blockIndex) == -1) { - assert(false); - } - } - - STSBlock* pBlock = &pTSBuf->block; - - size_t s = pBlock->numOfElem * TSDB_KEYSIZE; - - /* - * In order to accommodate all the qualified data, the actual buffer size for one block with identical tags value - * may exceed the maximum allowed size during *tsBufAppend* function by invoking expandBuffer function - */ - if (s > pTSBuf->tsData.allocSize) { - expandBuffer(&pTSBuf->tsData, s); - } - - pTSBuf->tsData.len = - tsDecompressTimestamp(pBlock->payload, pBlock->compLen, pBlock->numOfElem, pTSBuf->tsData.rawBuf, - pTSBuf->tsData.allocSize, TWO_STAGE_COMP, pTSBuf->assistBuf, pTSBuf->bufSize); - - assert((pTSBuf->tsData.len / TSDB_KEYSIZE == pBlock->numOfElem) && (pTSBuf->tsData.allocSize >= pTSBuf->tsData.len)); - - pCur->vnodeIndex = vnodeIndex; - pCur->blockIndex = blockIndex; - - pCur->tsIndex = (pCur->order == TSQL_SO_ASC) ? 0 : pBlock->numOfElem - 1; -} - -STSVnodeBlockInfo* tsBufGetVnodeBlockInfo(STSBuf* pTSBuf, int32_t vnodeId) { - int32_t j = tsBufFindVnodeIndexFromId(pTSBuf->pData, pTSBuf->numOfVnodes, vnodeId); - if (j == -1) { - return NULL; - } - - return &pTSBuf->pData[j].info; -} - -int32_t STSBufUpdateHeader(STSBuf* pTSBuf, STSBufFileHeader* pHeader) { - if ((pTSBuf->f == NULL) || pHeader == NULL || pHeader->numOfVnode < 0 || pHeader->magic != TS_COMP_FILE_MAGIC) { - return -1; - } - - int64_t r = fseek(pTSBuf->f, 0, SEEK_SET); - if (r != 0) { - return -1; - } - - fwrite(pHeader, sizeof(STSBufFileHeader), 1, pTSBuf->f); - return 0; -} - -bool tsBufNextPos(STSBuf* pTSBuf) { - if (pTSBuf == NULL || pTSBuf->numOfVnodes == 0) { - return false; - } - - STSCursor* pCur = &pTSBuf->cur; - - // get the first/last position according to traverse order - if (pCur->vnodeIndex == -1) { - if (pCur->order == TSQL_SO_ASC) { - tsBufGetBlock(pTSBuf, 0, 0); - - if (pTSBuf->block.numOfElem == 0) { // the whole list is empty, return - tsBufResetPos(pTSBuf); - return false; - } else { - return true; - } - - } else { // get the last timestamp record in the last block of the last vnode - assert(pTSBuf->numOfVnodes > 0); - - int32_t vnodeIndex = pTSBuf->numOfVnodes - 1; - pCur->vnodeIndex = vnodeIndex; - - int32_t vnodeId = pTSBuf->pData[pCur->vnodeIndex].info.vnode; - STSVnodeBlockInfo* pBlockInfo = tsBufGetVnodeBlockInfo(pTSBuf, vnodeId); - int32_t blockIndex = pBlockInfo->numOfBlocks - 1; - - tsBufGetBlock(pTSBuf, vnodeIndex, blockIndex); - - pCur->tsIndex = pTSBuf->block.numOfElem - 1; - if (pTSBuf->block.numOfElem == 0) { - tsBufResetPos(pTSBuf); - return false; - } else { - return true; - } - } - } - - int32_t step = pCur->order == TSQL_SO_ASC ? 1 : -1; - - while (1) { - assert(pTSBuf->tsData.len == pTSBuf->block.numOfElem * TSDB_KEYSIZE); - - if ((pCur->order == TSQL_SO_ASC && pCur->tsIndex >= pTSBuf->block.numOfElem - 1) || - (pCur->order == TSQL_SO_DESC && pCur->tsIndex <= 0)) { - int32_t vnodeId = pTSBuf->pData[pCur->vnodeIndex].info.vnode; - - STSVnodeBlockInfo* pBlockInfo = tsBufGetVnodeBlockInfo(pTSBuf, vnodeId); - if (pBlockInfo == NULL || (pCur->blockIndex >= pBlockInfo->numOfBlocks - 1 && pCur->order == TSQL_SO_ASC) || - (pCur->blockIndex <= 0 && pCur->order == TSQL_SO_DESC)) { - if ((pCur->vnodeIndex >= pTSBuf->numOfVnodes - 1 && pCur->order == TSQL_SO_ASC) || - (pCur->vnodeIndex <= 0 && pCur->order == TSQL_SO_DESC)) { - pCur->vnodeIndex = -1; - return false; - } - - if (pBlockInfo == NULL) { - return false; - } - - int32_t blockIndex = pCur->order == TSQL_SO_ASC ? 0 : pBlockInfo->numOfBlocks - 1; - tsBufGetBlock(pTSBuf, pCur->vnodeIndex + step, blockIndex); - break; - - } else { - tsBufGetBlock(pTSBuf, pCur->vnodeIndex, pCur->blockIndex + step); - break; - } - } else { - pCur->tsIndex += step; - break; - } - } - - return true; -} - -void tsBufResetPos(STSBuf* pTSBuf) { - if (pTSBuf == NULL) { - return; - } - - pTSBuf->cur = (STSCursor){.tsIndex = -1, .blockIndex = -1, .vnodeIndex = -1, .order = pTSBuf->cur.order}; -} - -STSElem tsBufGetElem(STSBuf* pTSBuf) { - STSElem elem1 = {.vnode = -1}; - STSCursor* pCur = &pTSBuf->cur; - - if (pTSBuf == NULL || pCur->vnodeIndex < 0) { - return elem1; - } - - STSBlock* pBlock = &pTSBuf->block; - - elem1.vnode = pTSBuf->pData[pCur->vnodeIndex].info.vnode; - elem1.ts = *(TSKEY*)(pTSBuf->tsData.rawBuf + pCur->tsIndex * TSDB_KEYSIZE); - elem1.tag = pBlock->tag; - - return elem1; -} - -/** - * current only support ts comp data from two vnode merge - * @param pDestBuf - * @param pSrcBuf - * @param vnodeId - * @return - */ -int32_t tsBufMerge(STSBuf* pDestBuf, const STSBuf* pSrcBuf, int32_t vnodeId) { - if (pDestBuf == NULL || pSrcBuf == NULL || pSrcBuf->numOfVnodes <= 0) { - return 0; - } - - if (pDestBuf->numOfVnodes + pSrcBuf->numOfVnodes > TS_COMP_FILE_VNODE_MAX) { - return -1; - } - - // src can only have one vnode index - if (pSrcBuf->numOfVnodes > 1) { - return -1; - } - - // there are data in buffer, flush to disk first - tsBufFlush(pDestBuf); - - // compared with the last vnode id - if (vnodeId != tsBufGetLastVnodeInfo(pDestBuf)->info.vnode) { - int32_t oldSize = pDestBuf->numOfVnodes; - int32_t newSize = oldSize + pSrcBuf->numOfVnodes; - - if (pDestBuf->numOfAlloc < newSize) { - pDestBuf->numOfAlloc = newSize; - - STSVnodeBlockInfoEx* tmp = realloc(pDestBuf->pData, sizeof(STSVnodeBlockInfoEx) * newSize); - if (tmp == NULL) { - return -1; - } - - pDestBuf->pData = tmp; - } - - // directly copy the vnode index information - memcpy(&pDestBuf->pData[oldSize], pSrcBuf->pData, (size_t)pSrcBuf->numOfVnodes * sizeof(STSVnodeBlockInfoEx)); - - // set the new offset value - for (int32_t i = 0; i < pSrcBuf->numOfVnodes; ++i) { - STSVnodeBlockInfoEx* pBlockInfoEx = &pDestBuf->pData[i + oldSize]; - pBlockInfoEx->info.offset = (pSrcBuf->pData[i].info.offset - getDataStartOffset()) + pDestBuf->fileSize; - pBlockInfoEx->info.vnode = vnodeId; - } - - pDestBuf->numOfVnodes = newSize; - } else { - STSVnodeBlockInfoEx* pBlockInfoEx = tsBufGetLastVnodeInfo(pDestBuf); - - pBlockInfoEx->len += pSrcBuf->pData[0].len; - pBlockInfoEx->info.numOfBlocks += pSrcBuf->pData[0].info.numOfBlocks; - pBlockInfoEx->info.compLen += pSrcBuf->pData[0].info.compLen; - pBlockInfoEx->info.vnode = vnodeId; - } - - int32_t r = fseek(pDestBuf->f, 0, SEEK_END); - assert(r == 0); - - int64_t offset = getDataStartOffset(); - int32_t size = pSrcBuf->fileSize - offset; - -#ifdef LINUX - ssize_t rc = tsendfile(fileno(pDestBuf->f), fileno(pSrcBuf->f), &offset, size); -#else - ssize_t rc = fsendfile(pDestBuf->f, pSrcBuf->f, &offset, size); -#endif - - if (rc == -1) { - tscError("failed to merge tsBuf from:%s to %s, reason:%s\n", pSrcBuf->path, pDestBuf->path, strerror(errno)); - return -1; - } - - if (rc != size) { - tscError("failed to merge tsBuf from:%s to %s, reason:%s\n", pSrcBuf->path, pDestBuf->path, strerror(errno)); - return -1; - } - - pDestBuf->numOfTotal += pSrcBuf->numOfTotal; - - int32_t oldSize = pDestBuf->fileSize; - - struct stat fileStat; - fstat(fileno(pDestBuf->f), &fileStat); - pDestBuf->fileSize = (uint32_t)fileStat.st_size; - - assert(pDestBuf->fileSize == oldSize + size); - - tscTrace("tsBuf merge success, %p, path:%s, fd:%d, file size:%d, numOfVnode:%d, autoDelete:%d", pDestBuf, - pDestBuf->path, fileno(pDestBuf->f), pDestBuf->fileSize, pDestBuf->numOfVnodes, pDestBuf->autoDelete); - - return 0; -} - -STSBuf* tsBufCreateFromCompBlocks(const char* pData, int32_t numOfBlocks, int32_t len, int32_t order) { - STSBuf* pTSBuf = tsBufCreate(true); - - STSVnodeBlockInfo* pBlockInfo = &(addOneVnodeInfo(pTSBuf, 0)->info); - pBlockInfo->numOfBlocks = numOfBlocks; - pBlockInfo->compLen = len; - pBlockInfo->offset = getDataStartOffset(); - pBlockInfo->vnode = 0; - - // update prev vnode length info in file - TSBufUpdateVnodeInfo(pTSBuf, pTSBuf->numOfVnodes - 1, pBlockInfo); - - fseek(pTSBuf->f, pBlockInfo->offset, SEEK_SET); - fwrite((void*)pData, 1, len, pTSBuf->f); - pTSBuf->fileSize += len; - - pTSBuf->tsOrder = order; - assert(order == TSQL_SO_ASC || order == TSQL_SO_DESC); - - STSBufFileHeader header = { - .magic = TS_COMP_FILE_MAGIC, .numOfVnode = pTSBuf->numOfVnodes, .tsOrder = pTSBuf->tsOrder}; - STSBufUpdateHeader(pTSBuf, &header); - - fsync(fileno(pTSBuf->f)); - - return pTSBuf; -} - -STSElem tsBufGetElemStartPos(STSBuf* pTSBuf, int32_t vnodeId, int64_t tag) { - STSElem elem = {.vnode = -1}; - - if (pTSBuf == NULL) { - return elem; - } - - int32_t j = tsBufFindVnodeIndexFromId(pTSBuf->pData, pTSBuf->numOfVnodes, vnodeId); - if (j == -1) { - return elem; - } - - // for debug purpose - // tsBufDisplay(pTSBuf); - - STSCursor* pCur = &pTSBuf->cur; - STSVnodeBlockInfo* pBlockInfo = &pTSBuf->pData[j].info; - - int32_t blockIndex = tsBufFindBlockByTag(pTSBuf, pBlockInfo, tag); - if (blockIndex < 0) { - return elem; - } - - pCur->vnodeIndex = j; - pCur->blockIndex = blockIndex; - tsBufGetBlock(pTSBuf, j, blockIndex); - - return tsBufGetElem(pTSBuf); -} - -STSCursor tsBufGetCursor(STSBuf* pTSBuf) { - STSCursor c = {.vnodeIndex = -1}; - if (pTSBuf == NULL) { - return c; - } - - return pTSBuf->cur; -} - -void tsBufSetCursor(STSBuf* pTSBuf, STSCursor* pCur) { - if (pTSBuf == NULL || pCur == NULL) { - return; - } - - // assert(pCur->vnodeIndex != -1 && pCur->tsIndex >= 0 && pCur->blockIndex >= 0); - if (pCur->vnodeIndex != -1) { - tsBufGetBlock(pTSBuf, pCur->vnodeIndex, pCur->blockIndex); - } - - pTSBuf->cur = *pCur; -} - -void tsBufSetTraverseOrder(STSBuf* pTSBuf, int32_t order) { - if (pTSBuf == NULL) { - return; - } - - pTSBuf->cur.order = order; -} - -STSBuf* tsBufClone(STSBuf* pTSBuf) { - if (pTSBuf == NULL) { - return NULL; - } - - return tsBufCreateFromFile(pTSBuf->path, false); -} - -void tsBufDisplay(STSBuf* pTSBuf) { - printf("-------start of ts comp file-------\n"); - printf("number of vnode:%d\n", pTSBuf->numOfVnodes); - - int32_t old = pTSBuf->cur.order; - pTSBuf->cur.order = TSQL_SO_ASC; - - tsBufResetPos(pTSBuf); - - while (tsBufNextPos(pTSBuf)) { - STSElem elem = tsBufGetElem(pTSBuf); - printf("%d-%" PRId64 "-%" PRId64 "\n", elem.vnode, *(int64_t*) elem.tag, elem.ts); - } - - pTSBuf->cur.order = old; - printf("-------end of ts comp file-------\n"); -} diff --git a/src/client/src/tscLocal.c b/src/client/src/tscLocal.c index 89b43816c86db232b2d48de86a839bc958296b38..46f3ab6687d8fed3110460a5e66f30d0843e8297 100644 --- a/src/client/src/tscLocal.c +++ b/src/client/src/tscLocal.c @@ -21,10 +21,10 @@ #include "tsclient.h" #include "taosdef.h" -#include "textbuffer.h" +#include "qextbuffer.h" #include "tscSecondaryMerge.h" #include "tschemautil.h" -#include "tsocket.h" +#include "name.h" static void tscSetLocalQueryResult(SSqlObj *pSql, const char *val, const char *columnName, size_t valueLength); @@ -77,20 +77,22 @@ static int32_t getToStringLength(const char *pData, int32_t length, int32_t type * length((uint64_t) 123456789011) > 12, greater than sizsof(uint64_t) */ static int32_t tscMaxLengthOfTagsFields(SSqlObj *pSql) { - STableMeta *pMeta = tscGetMeterMetaInfo(&pSql->cmd, 0, 0)->pMeterMeta; + STableMeta *pMeta = tscGetTableMetaInfoFromCmd(&pSql->cmd, 0, 0)->pTableMeta; - if (pMeta->tableType == TSDB_TABLE_TYPE_SUPER_TABLE || pMeta->tableType == TSDB_TABLE_TYPE_NORMAL_TABLE || - pMeta->tableType == TSDB_TABLE_TYPE_STREAM_TABLE) { + if (pMeta->tableType == TSDB_SUPER_TABLE || pMeta->tableType == TSDB_NORMAL_TABLE || + pMeta->tableType == TSDB_STREAM_TABLE) { return 0; } char * pTagValue = tsGetTagsValue(pMeta); - SSchema *pTagsSchema = tsGetTagSchema(pMeta); + SSchema *pTagsSchema = tscGetTableTagSchema(pMeta); int32_t len = getToStringLength(pTagValue, pTagsSchema[0].bytes, pTagsSchema[0].type); pTagValue += pTagsSchema[0].bytes; - for (int32_t i = 1; i < pMeta->numOfTags; ++i) { + int32_t numOfTags = tscGetNumOfTags(pMeta); + + for (int32_t i = 1; i < numOfTags; ++i) { int32_t tLen = getToStringLength(pTagValue, pTagsSchema[i].bytes, pTagsSchema[i].type); if (len < tLen) { len = tLen; @@ -108,8 +110,8 @@ static int32_t tscSetValueToResObj(SSqlObj *pSql, int32_t rowLen) { // one column for each row SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, 0); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - STableMeta * pMeta = pMeterMetaInfo->pMeterMeta; + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + STableMeta * pMeta = pTableMetaInfo->pTableMeta; /* * tagValueCnt is to denote the number of tags columns for meter, not metric. and is to show the column data. @@ -117,15 +119,15 @@ static int32_t tscSetValueToResObj(SSqlObj *pSql, int32_t rowLen) { * for metric, the value of tagValueCnt must be 0, but the numOfTags is not 0 */ - int32_t numOfRows = pMeta->numOfColumns; - int32_t totalNumOfRows = numOfRows + pMeta->numOfTags; + int32_t numOfRows = tscGetNumOfColumns(pMeta); + int32_t totalNumOfRows = numOfRows + tscGetNumOfTags(pMeta); - if (UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { - numOfRows = pMeta->numOfColumns + pMeta->numOfTags; + if (UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { + numOfRows = numOfRows + tscGetNumOfTags(pMeta); } tscInitResObjForLocalQuery(pSql, totalNumOfRows, rowLen); - SSchema *pSchema = tsGetSchema(pMeta); + SSchema *pSchema = tscGetTableSchema(pMeta); for (int32_t i = 0; i < numOfRows; ++i) { TAOS_FIELD *pField = tscFieldInfoGetField(pQueryInfo, 0); @@ -146,13 +148,13 @@ static int32_t tscSetValueToResObj(SSqlObj *pSql, int32_t rowLen) { *(int32_t *)(pRes->data + tscFieldInfoGetOffset(pQueryInfo, 2) * totalNumOfRows + pField->bytes * i) = bytes; pField = tscFieldInfoGetField(pQueryInfo, 3); - if (i >= pMeta->numOfColumns && pMeta->numOfTags != 0) { + if (i >= tscGetNumOfColumns(pMeta) && tscGetNumOfTags(pMeta) != 0) { strncpy(pRes->data + tscFieldInfoGetOffset(pQueryInfo, 3) * totalNumOfRows + pField->bytes * i, "tag", strlen("tag") + 1); } } - if (UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { + if (UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { return 0; } @@ -265,7 +267,7 @@ static int32_t tscBuildMeterSchemaResultFields(SSqlObj *pSql, int32_t numOfCols, static int32_t tscProcessDescribeTable(SSqlObj *pSql) { SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, 0); - assert(tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0)->pMeterMeta != NULL); + assert(tscGetMetaInfo(pQueryInfo, 0)->pTableMeta != NULL); const int32_t NUM_OF_DESCRIBE_TABLE_COLUMNS = 4; const int32_t TYPE_COLUMN_LENGTH = 16; @@ -290,15 +292,15 @@ static int tscBuildMetricTagProjectionResult(SSqlObj *pSql) { SSqlRes * pRes = &pSql->res; SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); - SSuperTableMeta *pMetricMeta = pMeterMetaInfo->pMetricMeta; - SSchema * pSchema = tsGetTagSchema(pMeterMetaInfo->pMeterMeta); + SSuperTableMeta *pMetricMeta = pTableMetaInfo->pMetricMeta; + SSchema * pSchema = tscGetTableTagSchema(pTableMetaInfo->pTableMeta); int32_t vOffset[TSDB_MAX_COLUMNS] = {0}; - for (int32_t f = 1; f < pMeterMetaInfo->numOfTags; ++f) { - int16_t tagColumnIndex = pMeterMetaInfo->tagColumnIndex[f - 1]; + for (int32_t f = 1; f < pTableMetaInfo->numOfTags; ++f) { + int16_t tagColumnIndex = pTableMetaInfo->tagColumnIndex[f - 1]; if (tagColumnIndex == -1) { vOffset[f] = vOffset[f - 1] + TSDB_TABLE_NAME_LEN; } else { @@ -316,15 +318,16 @@ static int tscBuildMetricTagProjectionResult(SSqlObj *pSql) { SVnodeSidList *pSidList = (SVnodeSidList *)((char *)pMetricMeta + pMetricMeta->list[i]); for (int32_t j = 0; j < pSidList->numOfSids; ++j) { - STableSidExtInfo *pSidExt = tscGetMeterSidInfo(pSidList, j); + STableIdInfo *pSidExt = tscGetMeterSidInfo(pSidList, j); for (int32_t k = 0; k < pQueryInfo->fieldsInfo.numOfOutputCols; ++k) { SColIndexEx *pColIndex = &tscSqlExprGet(pQueryInfo, k)->colInfo; int16_t offsetId = pColIndex->colIdx; assert((pColIndex->flag & TSDB_COL_TAG) != 0); - - char * val = pSidExt->tags + vOffset[offsetId]; + assert(0); + + char * val = NULL;//pSidExt->tags + vOffset[offsetId]; TAOS_FIELD *pField = tscFieldInfoGetField(pQueryInfo, k); memcpy(pRes->data + tscFieldInfoGetOffset(pQueryInfo, k) * totalNumOfResults + pField->bytes * rowIdx, val, @@ -343,7 +346,7 @@ static int tscBuildMetricTagSqlFunctionResult(SSqlObj *pSql) { SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); - SSuperTableMeta *pMetricMeta = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0)->pMetricMeta; + SSuperTableMeta *pMetricMeta = tscGetMetaInfo(pQueryInfo, 0)->pMetricMeta; int32_t totalNumOfResults = 1; // count function only produce one result int32_t rowLen = tscGetResRowLength(pQueryInfo); @@ -375,8 +378,8 @@ static int tscProcessQueryTags(SSqlObj *pSql) { SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); - STableMeta *pMeterMeta = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0)->pMeterMeta; - if (pMeterMeta == NULL || pMeterMeta->numOfTags == 0 || pMeterMeta->numOfColumns == 0) { + STableMeta *pTableMeta = tscGetMetaInfo(pQueryInfo, 0)->pTableMeta; + if (pTableMeta == NULL || tscGetNumOfTags(pTableMeta) == 0 || tscGetNumOfColumns(pTableMeta) == 0) { strcpy(pCmd->payload, "invalid table"); pSql->res.code = TSDB_CODE_INVALID_TABLE; return pSql->res.code; @@ -484,7 +487,7 @@ int tscProcessLocalCmd(SSqlObj *pSql) { pSql->res.qhandle = 0x1; pSql->res.numOfRows = 0; } else if (pCmd->command == TSDB_SQL_RESET_CACHE) { - taosClearDataCache(tscCacheHandle); + taosCacheEmpty(tscCacheHandle); } else if (pCmd->command == TSDB_SQL_SERV_VERSION) { tscProcessServerVer(pSql); } else if (pCmd->command == TSDB_SQL_CLI_VERSION) { diff --git a/src/client/src/tscParseInsert.c b/src/client/src/tscParseInsert.c index 4c9f99b93235f38cfd995dc822e2ceef2ee1a325..8fb6b925efd01fc1e22be509895762868fe28bc6 100644 --- a/src/client/src/tscParseInsert.c +++ b/src/client/src/tscParseInsert.c @@ -28,6 +28,7 @@ #include "taosdef.h" #include "tlog.h" +#include "tscSubquery.h" #include "tstoken.h" #include "ttime.h" @@ -496,15 +497,17 @@ static int32_t rowDataCompar(const void *lhs, const void *rhs) { } } -int tsParseValues(char **str, STableDataBlocks *pDataBlock, STableMeta *pMeterMeta, int maxRows, +int tsParseValues(char **str, STableDataBlocks *pDataBlock, STableMeta *pTableMeta, int maxRows, SParsedDataColInfo *spd, char *error, int32_t *code, char *tmpTokenBuf) { int32_t index = 0; SSQLToken sToken; int16_t numOfRows = 0; - SSchema *pSchema = tsGetSchema(pMeterMeta); - int32_t precision = pMeterMeta->precision; + SSchema *pSchema = tscGetTableSchema(pTableMeta); + STableComInfo tinfo = tscGetTableInfo(pTableMeta); + + int32_t precision = tinfo.precision; if (spd->hasVal[0] == false) { strcpy(error, "primary timestamp column can not be null"); @@ -518,9 +521,9 @@ int tsParseValues(char **str, STableDataBlocks *pDataBlock, STableMeta *pMeterMe if (sToken.n == 0 || sToken.type != TK_LP) break; *str += index; - if (numOfRows >= maxRows || pDataBlock->size + pMeterMeta->rowSize >= pDataBlock->nAllocSize) { + if (numOfRows >= maxRows || pDataBlock->size + tinfo.rowSize >= pDataBlock->nAllocSize) { int32_t tSize; - int32_t retcode = tscAllocateMemIfNeed(pDataBlock, pMeterMeta->rowSize, &tSize); + int32_t retcode = tscAllocateMemIfNeed(pDataBlock, tinfo.rowSize, &tSize); if (retcode != TSDB_CODE_SUCCESS) { //TODO pass the correct error code to client strcpy(error, "client out of memory"); *code = retcode; @@ -601,10 +604,10 @@ int32_t tscAllocateMemIfNeed(STableDataBlocks *pDataBlock, int32_t rowSize, int3 return TSDB_CODE_SUCCESS; } -static void tsSetBlockInfo(SShellSubmitBlock *pBlocks, const STableMeta *pMeterMeta, int32_t numOfRows) { - pBlocks->sid = pMeterMeta->sid; - pBlocks->uid = pMeterMeta->uid; - pBlocks->sversion = pMeterMeta->sversion; +static void tsSetBlockInfo(SShellSubmitBlock *pBlocks, const STableMeta *pTableMeta, int32_t numOfRows) { + pBlocks->sid = pTableMeta->sid; + pBlocks->uid = pTableMeta->uid; + pBlocks->sversion = pTableMeta->sversion; pBlocks->numOfRows += numOfRows; } @@ -654,19 +657,20 @@ void sortRemoveDuplicates(STableDataBlocks *dataBuf) { static int32_t doParseInsertStatement(SSqlObj *pSql, void *pTableHashList, char **str, SParsedDataColInfo *spd, int32_t *totalNum) { SSqlCmd * pCmd = &pSql->cmd; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - STableMeta * pMeterMeta = pMeterMetaInfo->pMeterMeta; - + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + STableMeta * pTableMeta = pTableMetaInfo->pTableMeta; + STableComInfo tinfo = tscGetTableInfo(pTableMeta); + STableDataBlocks *dataBuf = NULL; - int32_t ret = tscGetDataBlockFromList(pTableHashList, pCmd->pDataBlocks, pMeterMeta->uid, TSDB_DEFAULT_PAYLOAD_SIZE, - sizeof(SShellSubmitBlock), pMeterMeta->rowSize, pMeterMetaInfo->name, - pMeterMeta, &dataBuf); + int32_t ret = tscGetDataBlockFromList(pTableHashList, pCmd->pDataBlocks, pTableMeta->uid, TSDB_DEFAULT_PAYLOAD_SIZE, + sizeof(SShellSubmitBlock), tinfo.rowSize, pTableMetaInfo->name, + pTableMeta, &dataBuf); if (ret != TSDB_CODE_SUCCESS) { return ret; } int32_t maxNumOfRows; - ret = tscAllocateMemIfNeed(dataBuf, pMeterMeta->rowSize, &maxNumOfRows); + ret = tscAllocateMemIfNeed(dataBuf, tinfo.rowSize, &maxNumOfRows); if (TSDB_CODE_SUCCESS != ret) { return TSDB_CODE_CLI_OUT_OF_MEMORY; } @@ -677,7 +681,7 @@ static int32_t doParseInsertStatement(SSqlObj *pSql, void *pTableHashList, char return TSDB_CODE_CLI_OUT_OF_MEMORY; } - int32_t numOfRows = tsParseValues(str, dataBuf, pMeterMeta, maxNumOfRows, spd, pCmd->payload, &code, tmpTokenBuf); + int32_t numOfRows = tsParseValues(str, dataBuf, pTableMeta, maxNumOfRows, spd, pCmd->payload, &code, tmpTokenBuf); free(tmpTokenBuf); if (numOfRows <= 0) { return code; @@ -692,9 +696,9 @@ static int32_t doParseInsertStatement(SSqlObj *pSql, void *pTableHashList, char } SShellSubmitBlock *pBlocks = (SShellSubmitBlock *)(dataBuf->pData); - tsSetBlockInfo(pBlocks, pMeterMeta, numOfRows); + tsSetBlockInfo(pBlocks, pTableMeta, numOfRows); - dataBuf->vgid = pMeterMeta->vgid; + dataBuf->vgId = pTableMeta->vgId; dataBuf->numOfTables = 1; /* @@ -756,7 +760,7 @@ static int32_t tscCheckIfCreateTable(char **sqlstr, SSqlObj *pSql) { return TSDB_CODE_INVALID_SQL; } - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, TABLE_INDEX); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, TABLE_INDEX); if (sToken.type == TK_USING) { // create table if not exists according to the super table index = 0; @@ -767,34 +771,35 @@ static int32_t tscCheckIfCreateTable(char **sqlstr, SSqlObj *pSql) { memset(pTag, 0, sizeof(STagData)); /* - * the source super table is moved to the secondary position of the pMeterMetaInfo list + * the source super table is moved to the secondary position of the pTableMetaInfo list */ if (pQueryInfo->numOfTables < 2) { - tscAddEmptyMeterMetaInfo(pQueryInfo); + tscAddEmptyMetaInfo(pQueryInfo); } - SMeterMetaInfo *pSTableMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, STABLE_INDEX); + STableMetaInfo *pSTableMeterMetaInfo = tscGetMetaInfo(pQueryInfo, STABLE_INDEX); setMeterID(pSTableMeterMetaInfo, &sToken, pSql); strncpy(pTag->name, pSTableMeterMetaInfo->name, TSDB_TABLE_ID_LEN); - code = tscGetMeterMeta(pSql, pSTableMeterMetaInfo); + code = tscGetTableMeta(pSql, pSTableMeterMetaInfo); if (code != TSDB_CODE_SUCCESS) { return code; } - if (!UTIL_METER_IS_SUPERTABLE(pSTableMeterMetaInfo)) { + if (!UTIL_TABLE_IS_SUPERTABLE(pSTableMeterMetaInfo)) { return tscInvalidSQLErrMsg(pCmd->payload, "create table only from super table is allowed", sToken.z); } - SSchema *pTagSchema = tsGetTagSchema(pSTableMeterMetaInfo->pMeterMeta); - + SSchema *pTagSchema = tscGetTableTagSchema(pSTableMeterMetaInfo->pTableMeta); + STableComInfo tinfo = tscGetTableInfo(pSTableMeterMetaInfo->pTableMeta); + index = 0; sToken = tStrGetToken(sql, &index, false, 0, NULL); sql += index; SParsedDataColInfo spd = {0}; - - uint8_t numOfTags = pSTableMeterMetaInfo->pMeterMeta->numOfTags; + + uint8_t numOfTags = tscGetNumOfTags(pSTableMeterMetaInfo->pTableMeta); spd.numOfCols = numOfTags; // if specify some tags column @@ -881,8 +886,7 @@ static int32_t tscCheckIfCreateTable(char **sqlstr, SSqlObj *pSql) { sToken.n -= 2; } - code = tsParseOneColumnData(&pTagSchema[colIndex], &sToken, tagVal, pCmd->payload, &sql, false, - pSTableMeterMetaInfo->pMeterMeta->precision); + code = tsParseOneColumnData(&pTagSchema[colIndex], &sToken, tagVal, pCmd->payload, &sql, false, tinfo.precision); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -917,13 +921,13 @@ static int32_t tscCheckIfCreateTable(char **sqlstr, SSqlObj *pSql) { return tscInvalidSQLErrMsg(pCmd->payload, "invalid table name", *sqlstr); } - int32_t ret = setMeterID(pMeterMetaInfo, &tableToken, pSql); + int32_t ret = setMeterID(pTableMetaInfo, &tableToken, pSql); if (ret != TSDB_CODE_SUCCESS) { return ret; } createTable = true; - code = tscGetMeterMetaEx(pSql, pMeterMetaInfo, true); + code = tscGetMeterMetaEx(pSql, pTableMetaInfo, true); if (TSDB_CODE_ACTION_IN_PROGRESS == code) { return code; } @@ -934,7 +938,7 @@ static int32_t tscCheckIfCreateTable(char **sqlstr, SSqlObj *pSql) { } else { sql = sToken.z; } - code = tscGetMeterMeta(pSql, pMeterMetaInfo); + code = tscGetTableMeta(pSql, pTableMetaInfo); } int32_t len = cend - cstart + 1; @@ -983,15 +987,15 @@ int doParseInsertSql(SSqlObj *pSql, char *str) { int32_t totalNum = 0; int32_t code = TSDB_CODE_SUCCESS; - SMeterMetaInfo *pMeterMetaInfo = NULL; + STableMetaInfo *pTableMetaInfo = NULL; SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); assert(pQueryInfo != NULL); if (pQueryInfo->numOfTables == 0) { - pMeterMetaInfo = tscAddEmptyMeterMetaInfo(pQueryInfo); + pTableMetaInfo = tscAddEmptyMetaInfo(pQueryInfo); } else { - pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); } if ((code = tscAllocPayload(pCmd, TSDB_PAYLOAD_SIZE)) != TSDB_CODE_SUCCESS) { @@ -1002,7 +1006,7 @@ int doParseInsertSql(SSqlObj *pSql, char *str) { || ((NULL != pSql->asyncTblPos) && (NULL != pSql->pTableHashList))); if ((NULL == pSql->asyncTblPos) && (NULL == pSql->pTableHashList)) { - pSql->pTableHashList = taosInitHashTable(128, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false); + pSql->pTableHashList = taosHashInit(128, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false); pSql->cmd.pDataBlocks = tscCreateBlockArrayList(); if (NULL == pSql->pTableHashList || NULL == pSql->cmd.pDataBlocks) { @@ -1050,11 +1054,10 @@ int doParseInsertSql(SSqlObj *pSql, char *str) { goto _error_clean; } - if ((code = setMeterID(pMeterMetaInfo, &sToken, pSql)) != TSDB_CODE_SUCCESS) { + if ((code = setMeterID(pTableMetaInfo, &sToken, pSql)) != TSDB_CODE_SUCCESS) { goto _error_clean; } - void *fp = pSql->fp; ptrdiff_t pos = pSql->asyncTblPos - pSql->sqlstr; if ((code = tscCheckIfCreateTable(&str, pSql)) != TSDB_CODE_SUCCESS) { @@ -1064,21 +1067,19 @@ int doParseInsertSql(SSqlObj *pSql, char *str) { * And during the getMeterMetaCallback function, the sql string will be parsed from the * interrupted position. */ - if (fp != NULL) { - if (TSDB_CODE_ACTION_IN_PROGRESS == code) { - tscTrace("async insert and waiting to get meter meta, then continue parse sql from offset: %" PRId64, pos); - return code; - } - - // todo add to return - tscError("async insert parse error, code:%d, %s", code, tstrerror(code)); - pSql->asyncTblPos = NULL; + if (TSDB_CODE_ACTION_IN_PROGRESS == code) { + tscTrace("async insert and waiting to get meter meta, then continue parse sql from offset: %" PRId64, pos); + return code; } + // todo add to return + tscError("async insert parse error, code:%d, %s", code, tstrerror(code)); + pSql->asyncTblPos = NULL; + goto _error_clean; // TODO: should _clean or _error_clean to async flow ???? } - if (UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { + if (UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { code = tscInvalidSQLErrMsg(pCmd->payload, "insert data into super table is not supported", NULL); goto _error_clean; } @@ -1091,12 +1092,14 @@ int doParseInsertSql(SSqlObj *pSql, char *str) { code = tscInvalidSQLErrMsg(pCmd->payload, "keyword VALUES or FILE required", sToken.z); goto _error_clean; } - + + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + if (sToken.type == TK_VALUES) { - SParsedDataColInfo spd = {.numOfCols = pMeterMetaInfo->pMeterMeta->numOfColumns}; - SSchema * pSchema = tsGetSchema(pMeterMetaInfo->pMeterMeta); - - tscSetAssignedColumnInfo(&spd, pSchema, pMeterMetaInfo->pMeterMeta->numOfColumns); + SParsedDataColInfo spd = {.numOfCols = tinfo.numOfColumns}; + + SSchema *pSchema = tscGetTableSchema(pTableMetaInfo->pTableMeta); + tscSetAssignedColumnInfo(&spd, pSchema, tinfo.numOfColumns); if (validateDataSource(pCmd, DATA_FROM_SQL_STRING, sToken.z) != TSDB_CODE_SUCCESS) { goto _error_clean; @@ -1136,10 +1139,10 @@ int doParseInsertSql(SSqlObj *pSql, char *str) { wordfree(&full_path); STableDataBlocks *pDataBlock = NULL; - STableMeta* pMeterMeta = pMeterMetaInfo->pMeterMeta; + STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; - int32_t ret = tscCreateDataBlock(PATH_MAX, pMeterMeta->rowSize, sizeof(SShellSubmitBlock), pMeterMetaInfo->name, - pMeterMeta, &pDataBlock); + int32_t ret = tscCreateDataBlock(PATH_MAX, tinfo.rowSize, sizeof(SShellSubmitBlock), pTableMetaInfo->name, + pTableMeta, &pDataBlock); if (ret != TSDB_CODE_SUCCESS) { goto _error_clean; } @@ -1148,18 +1151,18 @@ int doParseInsertSql(SSqlObj *pSql, char *str) { strcpy(pDataBlock->filename, fname); } else if (sToken.type == TK_LP) { /* insert into tablename(col1, col2,..., coln) values(v1, v2,... vn); */ - STableMeta *pMeterMeta = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0)->pMeterMeta; - SSchema * pSchema = tsGetSchema(pMeterMeta); + STableMeta *pTableMeta = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0)->pTableMeta; + SSchema * pSchema = tscGetTableSchema(pTableMeta); if (validateDataSource(pCmd, DATA_FROM_SQL_STRING, sToken.z) != TSDB_CODE_SUCCESS) { goto _error_clean; } SParsedDataColInfo spd = {0}; - spd.numOfCols = pMeterMeta->numOfColumns; + spd.numOfCols = tinfo.numOfColumns; int16_t offset[TSDB_MAX_COLUMNS] = {0}; - for (int32_t t = 1; t < pMeterMeta->numOfColumns; ++t) { + for (int32_t t = 1; t < tinfo.numOfColumns; ++t) { offset[t] = offset[t - 1] + pSchema[t - 1].bytes; } @@ -1181,7 +1184,7 @@ int doParseInsertSql(SSqlObj *pSql, char *str) { bool findColumnIndex = false; // todo speedup by using hash list - for (int32_t t = 0; t < pMeterMeta->numOfColumns; ++t) { + for (int32_t t = 0; t < tinfo.numOfColumns; ++t) { if (strncmp(sToken.z, pSchema[t].name, sToken.n) == 0 && strlen(pSchema[t].name) == sToken.n) { SParsedColElem *pElem = &spd.elems[spd.numOfAssignedCols++]; pElem->offset = offset[t]; @@ -1204,7 +1207,7 @@ int doParseInsertSql(SSqlObj *pSql, char *str) { } } - if (spd.numOfAssignedCols == 0 || spd.numOfAssignedCols > pMeterMeta->numOfColumns) { + if (spd.numOfAssignedCols == 0 || spd.numOfAssignedCols > tinfo.numOfColumns) { code = tscInvalidSQLErrMsg(pCmd->payload, "column name expected", sToken.z); goto _error_clean; } @@ -1235,7 +1238,7 @@ int doParseInsertSql(SSqlObj *pSql, char *str) { // submit to more than one vnode if (pCmd->pDataBlocks->nSize > 0) { - // merge according to vgid + // merge according to vgId if ((code = tscMergeTableDataBlocks(pSql, pCmd->pDataBlocks)) != TSDB_CODE_SUCCESS) { goto _error_clean; } @@ -1245,10 +1248,10 @@ int doParseInsertSql(SSqlObj *pSql, char *str) { goto _error_clean; } - pMeterMetaInfo = tscGetMeterMetaInfo(&pSql->cmd, 0, 0); + pTableMetaInfo = tscGetTableMetaInfoFromCmd(&pSql->cmd, 0, 0); // set the next sent data vnode index in data block arraylist - pMeterMetaInfo->vnodeIndex = 1; + pTableMetaInfo->vnodeIndex = 1; } else { pCmd->pDataBlocks = tscDestroyBlockArrayList(pCmd->pDataBlocks); } @@ -1260,7 +1263,7 @@ _error_clean: pCmd->pDataBlocks = tscDestroyBlockArrayList(pCmd->pDataBlocks); _clean: - taosCleanUpHashTable(pSql->pTableHashList); + taosHashCleanup(pSql->pTableHashList); pSql->pTableHashList = NULL; pSql->asyncTblPos = NULL; @@ -1314,11 +1317,10 @@ int tsParseSql(SSqlObj *pSql, bool multiVnodeInsertion) { * the error handle callback function can rightfully restore the user defined function (fp) */ if (pSql->fp != NULL && multiVnodeInsertion) { - assert(pSql->fetchFp == NULL); pSql->fetchFp = pSql->fp; // replace user defined callback function with multi-insert proxy function - pSql->fp = tscAsyncInsertMultiVnodesProxy; + pSql->fp = (void(*)())tscHandleMultivnodeInsert; } ret = tsParseInsertSql(pSql); @@ -1334,7 +1336,7 @@ int tsParseSql(SSqlObj *pSql, bool multiVnodeInsertion) { } /* - * the pRes->code may be modified or even released by another thread in tscMeterMetaCallBack + * the pRes->code may be modified or even released by another thread in tscTableMetaCallBack * function, so do NOT use pRes->code to determine if the getMeterMeta/getMetricMeta function * invokes new threads to get data from mnode or simply retrieves data from cache. * @@ -1349,10 +1351,10 @@ static int doPackSendDataBlock(SSqlObj *pSql, int32_t numOfRows, STableDataBlock SSqlCmd *pCmd = &pSql->cmd; assert(pCmd->numOfClause == 1); - STableMeta *pMeterMeta = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0)->pMeterMeta; + STableMeta *pTableMeta = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0)->pTableMeta; SShellSubmitBlock *pBlocks = (SShellSubmitBlock *)(pTableDataBlocks->pData); - tsSetBlockInfo(pBlocks, pMeterMeta, numOfRows); + tsSetBlockInfo(pBlocks, pTableMeta, numOfRows); if ((code = tscMergeTableDataBlocks(pSql, pCmd->pDataBlocks)) != TSDB_CODE_SUCCESS) { return code; @@ -1382,16 +1384,18 @@ static int tscInsertDataFromFile(SSqlObj *pSql, FILE *fp, char *tmpTokenBuf) { int32_t code = 0; int nrows = 0; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - STableMeta * pMeterMeta = pMeterMetaInfo->pMeterMeta; + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + STableMeta * pTableMeta = pTableMetaInfo->pTableMeta; + STableComInfo tinfo = tscGetTableInfo(pTableMeta); + assert(pCmd->numOfClause == 1); - int32_t rowSize = pMeterMeta->rowSize; + int32_t rowSize = tinfo.rowSize; pCmd->pDataBlocks = tscCreateBlockArrayList(); STableDataBlocks *pTableDataBlock = NULL; int32_t ret = tscCreateDataBlock(TSDB_PAYLOAD_SIZE, rowSize, sizeof(SShellSubmitBlock), - pMeterMetaInfo->name, pMeterMeta, &pTableDataBlock); + pTableMetaInfo->name, pTableMeta, &pTableDataBlock); if (ret != TSDB_CODE_SUCCESS) { return -1; } @@ -1402,10 +1406,10 @@ static int tscInsertDataFromFile(SSqlObj *pSql, FILE *fp, char *tmpTokenBuf) { if (TSDB_CODE_SUCCESS != code) return -1; int count = 0; - SParsedDataColInfo spd = {.numOfCols = pMeterMeta->numOfColumns}; - SSchema * pSchema = tsGetSchema(pMeterMeta); + SParsedDataColInfo spd = {.numOfCols = tinfo.numOfColumns}; + SSchema * pSchema = tscGetTableSchema(pTableMeta); - tscSetAssignedColumnInfo(&spd, pSchema, pMeterMeta->numOfColumns); + tscSetAssignedColumnInfo(&spd, pSchema, tinfo.numOfColumns); while ((readLen = getline(&line, &n, fp)) != -1) { // line[--readLen] = '\0'; @@ -1415,7 +1419,7 @@ static int tscInsertDataFromFile(SSqlObj *pSql, FILE *fp, char *tmpTokenBuf) { char *lineptr = line; strtolower(line, line); - len = tsParseOneRowData(&lineptr, pTableDataBlock, pSchema, &spd, pCmd->payload, pMeterMeta->precision, &code, tmpTokenBuf); + len = tsParseOneRowData(&lineptr, pTableDataBlock, pSchema, &spd, pCmd->payload, tinfo.precision, &code, tmpTokenBuf); if (len <= 0 || pTableDataBlock->numOfParams > 0) { pSql->res.code = code; return (-code); @@ -1432,7 +1436,7 @@ static int tscInsertDataFromFile(SSqlObj *pSql, FILE *fp, char *tmpTokenBuf) { pTableDataBlock = pCmd->pDataBlocks->pData[0]; pTableDataBlock->size = sizeof(SShellSubmitBlock); - pTableDataBlock->rowSize = pMeterMeta->rowSize; + pTableDataBlock->rowSize = tinfo.rowSize; numOfRows += pSql->res.numOfRows; pSql->res.numOfRows = 0; @@ -1474,25 +1478,25 @@ void tscProcessMultiVnodesInsert(SSqlObj *pSql) { } STableDataBlocks *pDataBlock = NULL; - SMeterMetaInfo * pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); + STableMetaInfo * pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); assert(pCmd->numOfClause == 1); int32_t code = TSDB_CODE_SUCCESS; /* the first block has been sent to server in processSQL function */ - assert(pMeterMetaInfo->vnodeIndex >= 1 && pCmd->pDataBlocks != NULL); + assert(pTableMetaInfo->vnodeIndex >= 1 && pCmd->pDataBlocks != NULL); - if (pMeterMetaInfo->vnodeIndex < pCmd->pDataBlocks->nSize) { + if (pTableMetaInfo->vnodeIndex < pCmd->pDataBlocks->nSize) { SDataBlockList *pDataBlocks = pCmd->pDataBlocks; - for (int32_t i = pMeterMetaInfo->vnodeIndex; i < pDataBlocks->nSize; ++i) { + for (int32_t i = pTableMetaInfo->vnodeIndex; i < pDataBlocks->nSize; ++i) { pDataBlock = pDataBlocks->pData[i]; if (pDataBlock == NULL) { continue; } if ((code = tscCopyDataBlockToPayload(pSql, pDataBlock)) != TSDB_CODE_SUCCESS) { - tscTrace("%p build submit data block failed, vnodeIdx:%d, total:%d", pSql, pMeterMetaInfo->vnodeIndex, + tscTrace("%p build submit data block failed, vnodeIdx:%d, total:%d", pSql, pTableMetaInfo->vnodeIndex, pDataBlocks->nSize); continue; } @@ -1513,7 +1517,7 @@ void tscProcessMultiVnodesInsertFromFile(SSqlObj *pSql) { } SQueryInfo * pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); STableDataBlocks *pDataBlock = NULL; int32_t affected_rows = 0; @@ -1544,10 +1548,10 @@ void tscProcessMultiVnodesInsertFromFile(SSqlObj *pSql) { continue; } - strncpy(pMeterMetaInfo->name, pDataBlock->tableId, TSDB_TABLE_ID_LEN); + strncpy(pTableMetaInfo->name, pDataBlock->tableId, TSDB_TABLE_ID_LEN); memset(pDataBlock->pData, 0, pDataBlock->nAllocSize); - int32_t ret = tscGetMeterMeta(pSql, pMeterMetaInfo); + int32_t ret = tscGetTableMeta(pSql, pTableMetaInfo); if (ret != TSDB_CODE_SUCCESS) { tscError("%p get meter meta failed, abort", pSql); continue; diff --git a/src/client/src/tscPrepare.c b/src/client/src/tscPrepare.c index cb991691f5473cb7b8c528f017508d53df7b45b2..96215ce73c269c1a2ec0d99bb0d7a6a217b97396 100644 --- a/src/client/src/tscPrepare.c +++ b/src/client/src/tscPrepare.c @@ -15,7 +15,6 @@ #include "taos.h" #include "tsclient.h" -#include "tscSQLParser.h" #include "tscUtil.h" #include "ttimer.h" #include "taosmsg.h" @@ -408,8 +407,8 @@ static int insertStmtReset(STscStmt* pStmt) { } pCmd->batchSize = 0; - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - pMeterMetaInfo->vnodeIndex = 0; + STableMetaInfo* pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + pTableMetaInfo->vnodeIndex = 0; return TSDB_CODE_SUCCESS; } @@ -422,7 +421,7 @@ static int insertStmtExecute(STscStmt* stmt) { ++pCmd->batchSize; } - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); + STableMetaInfo* pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); assert(pCmd->numOfClause == 1); if (pCmd->pDataBlocks->nSize > 0) { @@ -439,7 +438,7 @@ static int insertStmtExecute(STscStmt* stmt) { } // set the next sent data vnode index in data block arraylist - pMeterMetaInfo->vnodeIndex = 1; + pTableMetaInfo->vnodeIndex = 1; } else { pCmd->pDataBlocks = tscDestroyBlockArrayList(pCmd->pDataBlocks); } diff --git a/src/client/src/tscSQLParser.c b/src/client/src/tscSQLParser.c index 25df797c04b0b85d9f5297b45c392e2898ebd70a..e0850a7139f72a7569560c8131d5c8c2ff69fa14 100644 --- a/src/client/src/tscSQLParser.c +++ b/src/client/src/tscSQLParser.c @@ -17,17 +17,19 @@ #define _DEFAULT_SOURCE #include "os.h" +#include "qast.h" #include "taos.h" #include "taosmsg.h" #include "tstoken.h" #include "tstrbuild.h" #include "ttime.h" -#include "tast.h" -#include "tscSQLParser.h" #include "tscUtil.h" #include "tschemautil.h" #include "tsclient.h" +#include "ttokendef.h" + +#include "name.h" #define DEFAULT_PRIMARY_TIMESTAMP_COL_NAME "_c0" @@ -59,7 +61,7 @@ static int32_t setObjFullName(char* fullName, const char* account, SSQLToken* pD static void getColumnName(tSQLExprItem* pItem, char* resultFieldName, int32_t nameLength); static void getRevisedName(char* resultFieldName, int32_t functionId, int32_t maxLen, char* columnName); -static int32_t addExprAndResultField(SQueryInfo* pQueryInfo, int32_t colIdx, tSQLExprItem* pItem, bool isResultColumn); +static int32_t addExprAndResultField(SQueryInfo* pQueryInfo, int32_t colIdx, tSQLExprItem* pItem, bool finalResult); static int32_t insertResultField(SQueryInfo* pQueryInfo, int32_t outputIndex, SColumnList* pIdList, int16_t bytes, int8_t type, char* fieldName, SSqlExpr* pSqlExpr); static int32_t changeFunctionID(int32_t optr, int16_t* functionId); @@ -106,7 +108,7 @@ static int32_t optrToString(tSQLExpr* pExpr, char** exprString); static int32_t getMeterIndex(SSQLToken* pTableToken, SQueryInfo* pQueryInfo, SColumnIndex* pIndex); static int32_t doFunctionsCompatibleCheck(SSqlCmd* pCmd, SQueryInfo* pQueryInfo); static int32_t doLocalQueryProcess(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql); -static int32_t tscCheckCreateDbParams(SSqlCmd* pCmd, SCreateDbMsg* pCreate); +static int32_t tscCheckCreateDbParams(SSqlCmd* pCmd, SCMCreateDbMsg* pCreate); static SColumnList getColumnList(int32_t num, int16_t tableIndex, int32_t columnIndex); @@ -149,10 +151,12 @@ static int setColumnFilterInfoForTimestamp(SQueryInfo* pQueryInfo, tVariant* pVa strdequote(pVar->pz); char* seg = strnchr(pVar->pz, '-', pVar->nLen, false); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + if (seg != NULL) { - if (taosParseTime(pVar->pz, &time, pVar->nLen, pMeterMetaInfo->pMeterMeta->precision) != TSDB_CODE_SUCCESS) { + if (taosParseTime(pVar->pz, &time, pVar->nLen, tinfo.precision) != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(pQueryInfo->msg, msg); } } else { @@ -207,7 +211,7 @@ int32_t tscToSQLCmd(SSqlObj* pSql, struct SSqlInfo* pInfo) { int32_t code = tscGetQueryInfoDetailSafely(pCmd, pCmd->clauseIndex, &pQueryInfo); assert(pQueryInfo->numOfTables == 0); - SMeterMetaInfo* pMeterMetaInfo = tscAddEmptyMeterMetaInfo(pQueryInfo); + STableMetaInfo* pTableMetaInfo = tscAddEmptyMetaInfo(pQueryInfo); pCmd->command = pInfo->type; @@ -229,7 +233,7 @@ int32_t tscToSQLCmd(SSqlObj* pSql, struct SSqlInfo* pInfo) { if (pInfo->type == TSDB_SQL_DROP_DB) { assert(pInfo->pDCLInfo->nTokens == 1); - code = setObjFullName(pMeterMetaInfo->name, getAccountId(pSql), pzName, NULL, NULL); + code = setObjFullName(pTableMetaInfo->name, getAccountId(pSql), pzName, NULL, NULL); if (code != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg2); } @@ -237,7 +241,7 @@ int32_t tscToSQLCmd(SSqlObj* pSql, struct SSqlInfo* pInfo) { } else if (pInfo->type == TSDB_SQL_DROP_TABLE) { assert(pInfo->pDCLInfo->nTokens == 1); - if (setMeterID(pMeterMetaInfo, pzName, pSql) != TSDB_CODE_SUCCESS) { + if (setMeterID(pTableMetaInfo, pzName, pSql) != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg3); } } else if (pInfo->type == TSDB_SQL_DROP_DNODE) { @@ -245,13 +249,13 @@ int32_t tscToSQLCmd(SSqlObj* pSql, struct SSqlInfo* pInfo) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg1); } - strncpy(pMeterMetaInfo->name, pzName->z, pzName->n); + strncpy(pTableMetaInfo->name, pzName->z, pzName->n); } else { // drop user if (pzName->n > TSDB_USER_LEN) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg3); } - strncpy(pMeterMetaInfo->name, pzName->z, pzName->n); + strncpy(pTableMetaInfo->name, pzName->z, pzName->n); } break; @@ -265,7 +269,7 @@ int32_t tscToSQLCmd(SSqlObj* pSql, struct SSqlInfo* pInfo) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg); } - int32_t ret = setObjFullName(pMeterMetaInfo->name, getAccountId(pSql), pToken, NULL, NULL); + int32_t ret = setObjFullName(pTableMetaInfo->name, getAccountId(pSql), pToken, NULL, NULL); if (ret != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg); } @@ -295,7 +299,7 @@ int32_t tscToSQLCmd(SSqlObj* pSql, struct SSqlInfo* pInfo) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg1); } - int32_t ret = setObjFullName(pMeterMetaInfo->name, getAccountId(pSql), &(pCreateDB->dbname), NULL, NULL); + int32_t ret = setObjFullName(pTableMetaInfo->name, getAccountId(pSql), &(pCreateDB->dbname), NULL, NULL); if (ret != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg2); } @@ -370,11 +374,11 @@ int32_t tscToSQLCmd(SSqlObj* pSql, struct SSqlInfo* pInfo) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg2); } - if (setMeterID(pMeterMetaInfo, pToken, pSql) != TSDB_CODE_SUCCESS) { + if (setMeterID(pTableMetaInfo, pToken, pSql) != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg2); } - return tscGetMeterMeta(pSql, pMeterMetaInfo); + return tscGetTableMeta(pSql, pTableMetaInfo); } case TSDB_SQL_CFG_DNODE: { @@ -585,8 +589,9 @@ int32_t parseIntervalClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql) { const char* msg1 = "invalid query expression"; const char* msg2 = "interval cannot be less than 10 ms"; - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + if (pQuerySql->interval.type == 0 || pQuerySql->interval.n == 0) { return TSDB_CODE_SUCCESS; } @@ -598,12 +603,12 @@ int32_t parseIntervalClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql) { } // if the unit of time window value is millisecond, change the value from microsecond - if (pMeterMetaInfo->pMeterMeta->precision == TSDB_TIME_PRECISION_MILLI) { + if (tinfo.precision == TSDB_TIME_PRECISION_MILLI) { pQueryInfo->intervalTime = pQueryInfo->intervalTime / 1000; } /* parser has filter the illegal type, no need to check here */ - pQueryInfo->intervalTimeUnit = pQuerySql->interval.z[pQuerySql->interval.n - 1]; + pQueryInfo->slidingTimeUnit = pQuerySql->interval.z[pQuerySql->interval.n - 1]; // interval cannot be less than 10 milliseconds if (pQueryInfo->intervalTime < tsMinIntervalTime) { @@ -643,8 +648,8 @@ int32_t parseIntervalClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql) { int32_t tableIndex = COLUMN_INDEX_INITIAL_VAL; for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { - pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, i); - if (pMeterMetaInfo->pMeterMeta->uid == uid) { + pTableMetaInfo = tscGetMetaInfo(pQueryInfo, i); + if (pTableMetaInfo->pTableMeta->uid == uid) { tableIndex = i; break; } @@ -676,12 +681,13 @@ int32_t parseSlidingClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql) { const char* msg0 = "sliding value too small"; const char* msg1 = "sliding value no larger than the interval value"; - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); SSQLToken* pSliding = &pQuerySql->sliding; + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); if (pSliding->n != 0) { getTimestampInUsFromStr(pSliding->z, pSliding->n, &pQueryInfo->slidingTime); - if (pMeterMetaInfo->pMeterMeta->precision == TSDB_TIME_PRECISION_MILLI) { + if (tinfo.precision == TSDB_TIME_PRECISION_MILLI) { pQueryInfo->slidingTime /= 1000; } @@ -699,27 +705,27 @@ int32_t parseSlidingClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql) { return TSDB_CODE_SUCCESS; } -int32_t setMeterID(SMeterMetaInfo* pMeterMetaInfo, SSQLToken* pzTableName, SSqlObj* pSql) { +int32_t setMeterID(STableMetaInfo* pTableMetaInfo, SSQLToken* pzTableName, SSqlObj* pSql) { const char* msg = "name too long"; SSqlCmd* pCmd = &pSql->cmd; int32_t code = TSDB_CODE_SUCCESS; - // backup the old name in pMeterMetaInfo - size_t size = strlen(pMeterMetaInfo->name); + // backup the old name in pTableMetaInfo + size_t size = strlen(pTableMetaInfo->name); char* oldName = NULL; if (size > 0) { - oldName = strdup(pMeterMetaInfo->name); + oldName = strdup(pTableMetaInfo->name); } if (hasSpecifyDB(pzTableName)) { // db has been specified in sql string so we ignore current db path - code = setObjFullName(pMeterMetaInfo->name, getAccountId(pSql), NULL, pzTableName, NULL); + code = setObjFullName(pTableMetaInfo->name, getAccountId(pSql), NULL, pzTableName, NULL); } else { // get current DB name first, then set it into path SSQLToken t = {0}; getCurrentDBName(pSql, &t); - code = setObjFullName(pMeterMetaInfo->name, NULL, &t, pzTableName, NULL); + code = setObjFullName(pTableMetaInfo->name, NULL, &t, pzTableName, NULL); } if (code != TSDB_CODE_SUCCESS) { @@ -736,11 +742,11 @@ int32_t setMeterID(SMeterMetaInfo* pMeterMetaInfo, SSQLToken* pzTableName, SSqlO * that are corresponding to the old name for the new table name. */ if (size > 0) { - if (strncasecmp(oldName, pMeterMetaInfo->name, tListLen(pMeterMetaInfo->name)) != 0) { - tscClearMeterMetaInfo(pMeterMetaInfo, false); + if (strncasecmp(oldName, pTableMetaInfo->name, tListLen(pTableMetaInfo->name)) != 0) { + tscClearMeterMetaInfo(pTableMetaInfo, false); } } else { - assert(pMeterMetaInfo->pMeterMeta == NULL && pMeterMetaInfo->pMetricMeta == NULL); + assert(pTableMetaInfo->pTableMeta == NULL && pTableMetaInfo->pMetricMeta == NULL); } tfree(oldName); @@ -890,11 +896,14 @@ bool validateOneTags(SSqlCmd* pCmd, TAOS_FIELD* pTagField) { assert(pCmd->numOfClause == 1); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - STableMeta* pMeterMeta = pMeterMetaInfo->pMeterMeta; + STableMetaInfo* pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; + int32_t numOfTags = tscGetNumOfTags(pTableMeta); + int32_t numOfCols = tscGetNumOfColumns(pTableMeta); + // no more than 6 tags - if (pMeterMeta->numOfTags == TSDB_MAX_TAGS) { + if (numOfTags == TSDB_MAX_TAGS) { char msg[128] = {0}; sprintf(msg, "tags no more than %d", TSDB_MAX_TAGS); @@ -913,10 +922,10 @@ bool validateOneTags(SSqlCmd* pCmd, TAOS_FIELD* pTagField) { return false; } - SSchema* pTagSchema = tsGetTagSchema(pMeterMetaInfo->pMeterMeta); + SSchema* pTagSchema = tscGetTableTagSchema(pTableMetaInfo->pTableMeta); int32_t nLen = 0; - for (int32_t i = 0; i < pMeterMeta->numOfTags; ++i) { + for (int32_t i = 0; i < numOfTags; ++i) { nLen += pTagSchema[i].bytes; } @@ -939,9 +948,9 @@ bool validateOneTags(SSqlCmd* pCmd, TAOS_FIELD* pTagField) { } // field name must be unique - SSchema* pSchema = tsGetSchema(pMeterMeta); + SSchema* pSchema = tscGetTableSchema(pTableMeta); - for (int32_t i = 0; i < pMeterMeta->numOfTags + pMeterMeta->numOfColumns; ++i) { + for (int32_t i = 0; i < numOfTags + numOfCols; ++i) { if (strncasecmp(pTagField->name, pSchema[i].name, TSDB_COL_NAME_LEN) == 0) { invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg2); return false; @@ -960,12 +969,14 @@ bool validateOneColumn(SSqlCmd* pCmd, TAOS_FIELD* pColField) { const char* msg6 = "invalid column length"; assert(pCmd->numOfClause == 1); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - STableMeta* pMeterMeta = pMeterMetaInfo->pMeterMeta; - + STableMetaInfo* pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; + + int32_t numOfTags = tscGetNumOfTags(pTableMeta); + int32_t numOfCols = tscGetNumOfColumns(pTableMeta); + // no more max columns - if (pMeterMeta->numOfColumns >= TSDB_MAX_COLUMNS || - pMeterMeta->numOfTags + pMeterMeta->numOfColumns >= TSDB_MAX_COLUMNS) { + if (numOfCols >= TSDB_MAX_COLUMNS || numOfTags + numOfCols >= TSDB_MAX_COLUMNS) { invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg1); return false; } @@ -980,10 +991,10 @@ bool validateOneColumn(SSqlCmd* pCmd, TAOS_FIELD* pColField) { return false; } - SSchema* pSchema = tsGetSchema(pMeterMeta); + SSchema* pSchema = tscGetTableSchema(pTableMeta); int32_t nLen = 0; - for (int32_t i = 0; i < pMeterMeta->numOfColumns; ++i) { + for (int32_t i = 0; i < numOfCols; ++i) { nLen += pSchema[i].bytes; } @@ -999,7 +1010,7 @@ bool validateOneColumn(SSqlCmd* pCmd, TAOS_FIELD* pColField) { } // field name must be unique - for (int32_t i = 0; i < pMeterMeta->numOfTags + pMeterMeta->numOfColumns; ++i) { + for (int32_t i = 0; i < numOfTags + numOfCols; ++i) { if (strncasecmp(pColField->name, pSchema[i].name, TSDB_COL_NAME_LEN) == 0) { invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg2); return false; @@ -1190,7 +1201,7 @@ int32_t parseSelectClause(SSqlCmd* pCmd, int32_t clauseIndex, tSQLExprList* pSel pFuncExpr->interResBytes = sizeof(double); pFuncExpr->resType = TSDB_DATA_TYPE_DOUBLE; - SSqlBinaryExprInfo* pBinExprInfo = &pFuncExpr->pBinExprInfo; + SSqlBinaryExprInfo* pBinExprInfo = &pFuncExpr->binExprInfo; tSQLSyntaxNode* pNode = NULL; SColIndexEx* pColIndex = NULL; @@ -1238,10 +1249,11 @@ int32_t parseSelectClause(SSqlCmd* pCmd, int32_t clauseIndex, tSQLExprList* pSel if (isSTable) { pQueryInfo->type |= TSDB_QUERY_TYPE_STABLE_QUERY; - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + int32_t numOfCols = tscGetNumOfColumns(pTableMetaInfo->pTableMeta); + if (tscQueryMetricTags(pQueryInfo)) { // local handle the metric tag query - pCmd->count = pMeterMetaInfo->pMeterMeta->numOfColumns; // the number of meter schema, tricky. + pCmd->count = numOfCols; // the number of meter schema, tricky. pQueryInfo->command = TSDB_SQL_RETRIEVE_TAGS; } @@ -1272,11 +1284,11 @@ int32_t insertResultField(SQueryInfo* pQueryInfo, int32_t outputIndex, SColumnLi } SSqlExpr* doAddProjectCol(SQueryInfo* pQueryInfo, int32_t outputIndex, int32_t colIdx, int32_t tableIndex) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, tableIndex); - STableMeta* pMeterMeta = pMeterMetaInfo->pMeterMeta; - - SSchema* pSchema = tsGetColumnSchema(pMeterMeta, colIdx); - int32_t numOfCols = pMeterMeta->numOfColumns; + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableIndex); + STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; + int32_t numOfCols = tscGetNumOfColumns(pTableMeta); + + SSchema* pSchema = tscGetTableColumnSchema(pTableMeta, colIdx); int16_t functionId = (int16_t)((colIdx >= numOfCols) ? TSDB_FUNC_TAGPRJ : TSDB_FUNC_PRJ); @@ -1295,39 +1307,39 @@ SSqlExpr* doAddProjectCol(SQueryInfo* pQueryInfo, int32_t outputIndex, int32_t c } void addRequiredTagColumn(SQueryInfo* pQueryInfo, int32_t tagColIndex, int32_t tableIndex) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, tableIndex); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableIndex); - if (pMeterMetaInfo->numOfTags == 0 || pMeterMetaInfo->tagColumnIndex[pMeterMetaInfo->numOfTags - 1] < tagColIndex) { - pMeterMetaInfo->tagColumnIndex[pMeterMetaInfo->numOfTags++] = tagColIndex; + if (pTableMetaInfo->numOfTags == 0 || pTableMetaInfo->tagColumnIndex[pTableMetaInfo->numOfTags - 1] < tagColIndex) { + pTableMetaInfo->tagColumnIndex[pTableMetaInfo->numOfTags++] = tagColIndex; } else { // find the appropriate position - for (int32_t i = 0; i < pMeterMetaInfo->numOfTags; ++i) { - if (tagColIndex > pMeterMetaInfo->tagColumnIndex[i]) { + for (int32_t i = 0; i < pTableMetaInfo->numOfTags; ++i) { + if (tagColIndex > pTableMetaInfo->tagColumnIndex[i]) { continue; - } else if (tagColIndex == pMeterMetaInfo->tagColumnIndex[i]) { + } else if (tagColIndex == pTableMetaInfo->tagColumnIndex[i]) { break; } else { - memmove(&pMeterMetaInfo->tagColumnIndex[i + 1], &pMeterMetaInfo->tagColumnIndex[i], - sizeof(pMeterMetaInfo->tagColumnIndex[0]) * (pMeterMetaInfo->numOfTags - i)); + memmove(&pTableMetaInfo->tagColumnIndex[i + 1], &pTableMetaInfo->tagColumnIndex[i], + sizeof(pTableMetaInfo->tagColumnIndex[0]) * (pTableMetaInfo->numOfTags - i)); - pMeterMetaInfo->tagColumnIndex[i] = tagColIndex; + pTableMetaInfo->tagColumnIndex[i] = tagColIndex; - pMeterMetaInfo->numOfTags++; + pTableMetaInfo->numOfTags++; break; } } } // plus one means tbname - assert(tagColIndex >= -1 && tagColIndex < TSDB_MAX_TAGS && pMeterMetaInfo->numOfTags <= TSDB_MAX_TAGS + 1); + assert(tagColIndex >= -1 && tagColIndex < TSDB_MAX_TAGS && pTableMetaInfo->numOfTags <= TSDB_MAX_TAGS + 1); } static void addProjectQueryCol(SQueryInfo* pQueryInfo, int32_t startPos, SColumnIndex* pIndex, tSQLExprItem* pItem) { SSqlExpr* pExpr = doAddProjectCol(pQueryInfo, startPos, pIndex->columnIndex, pIndex->tableIndex); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, pIndex->tableIndex); - STableMeta* pMeterMeta = pMeterMetaInfo->pMeterMeta; - - SSchema* pSchema = tsGetColumnSchema(pMeterMeta, pIndex->columnIndex); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, pIndex->tableIndex); + STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; + + SSchema* pSchema = tscGetTableColumnSchema(pTableMeta, pIndex->columnIndex); char* colName = (pItem->aliasName == NULL) ? pSchema->name : pItem->aliasName; strncpy(pExpr->aliasName, colName, tListLen(pExpr->aliasName)); @@ -1336,7 +1348,7 @@ static void addProjectQueryCol(SQueryInfo* pQueryInfo, int32_t startPos, SColumn ids.num = 1; ids.ids[0] = *pIndex; - if (pIndex->columnIndex >= pMeterMeta->numOfColumns || pIndex->columnIndex == TSDB_TBNAME_COLUMN_INDEX) { + if (pIndex->columnIndex >= tscGetNumOfColumns(pTableMeta) || pIndex->columnIndex == TSDB_TBNAME_COLUMN_INDEX) { ids.num = 0; } @@ -1362,16 +1374,18 @@ void tscAddSpecialColumnForSelect(SQueryInfo* pQueryInfo, int32_t outputColIndex } static int32_t doAddProjectionExprAndResultFields(SQueryInfo* pQueryInfo, SColumnIndex* pIndex, int32_t startPos) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, pIndex->tableIndex); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, pIndex->tableIndex); int32_t numOfTotalColumns = 0; - STableMeta* pMeterMeta = pMeterMetaInfo->pMeterMeta; - SSchema* pSchema = tsGetSchema(pMeterMeta); + STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; + SSchema* pSchema = tscGetTableSchema(pTableMeta); - if (UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { - numOfTotalColumns = pMeterMeta->numOfColumns + pMeterMeta->numOfTags; + STableComInfo tinfo = tscGetTableInfo(pTableMeta); + + if (UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { + numOfTotalColumns = tinfo.numOfColumns + tinfo.numOfTags; } else { - numOfTotalColumns = pMeterMeta->numOfColumns; + numOfTotalColumns = tinfo.numOfColumns; } for (int32_t j = 0; j < numOfTotalColumns; ++j) { @@ -1383,7 +1397,7 @@ static int32_t doAddProjectionExprAndResultFields(SQueryInfo* pQueryInfo, SColum ids.ids[0] = *pIndex; // tag columns do not add to source list - ids.num = (j >= pMeterMeta->numOfColumns) ? 0 : 1; + ids.num = (j >= tscGetNumOfColumns(pTableMeta)) ? 0 : 1; insertResultField(pQueryInfo, startPos + j, &ids, pSchema[j].bytes, pSchema[j].type, pSchema[j].name, pExpr); } @@ -1427,10 +1441,10 @@ int32_t addProjectionExprAndResultField(SQueryInfo* pQueryInfo, tSQLExprItem* pI pQueryInfo->type = TSDB_QUERY_TYPE_STABLE_QUERY; tscAddSpecialColumnForSelect(pQueryInfo, startPos, TSDB_FUNC_TAGPRJ, &index, &colSchema, true); } else { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex); - STableMeta* pMeterMeta = pMeterMetaInfo->pMeterMeta; + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index.tableIndex); + STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; - if (index.columnIndex >= pMeterMeta->numOfColumns && UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { + if (index.columnIndex >= tscGetNumOfColumns(pTableMeta) && UTIL_TABLE_IS_NOMRAL_TABLE(pTableMetaInfo)) { return invalidSqlErrMsg(pQueryInfo->msg, msg1); } @@ -1488,7 +1502,7 @@ static int32_t setExprInfoForFunctions(SQueryInfo* pQueryInfo, SSchema* pSchema, } int32_t addExprAndResultField(SQueryInfo* pQueryInfo, int32_t colIdx, tSQLExprItem* pItem, bool finalResult) { - SMeterMetaInfo* pMeterMetaInfo = NULL; + STableMetaInfo* pTableMetaInfo = NULL; int32_t optr = pItem->pNode->nSQLOptr; const char* msg1 = "not support column types"; @@ -1538,10 +1552,10 @@ int32_t addExprAndResultField(SQueryInfo* pQueryInfo, int32_t colIdx, tSQLExprIt return invalidSqlErrMsg(pQueryInfo->msg, msg3); } - pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex); + pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index.tableIndex); // count tag is equalled to count(tbname) - if (index.columnIndex >= pMeterMetaInfo->pMeterMeta->numOfColumns) { + if (index.columnIndex >= tscGetNumOfColumns(pTableMetaInfo->pTableMeta)) { index.columnIndex = TSDB_TBNAME_COLUMN_INDEX; } @@ -1603,8 +1617,8 @@ int32_t addExprAndResultField(SQueryInfo* pQueryInfo, int32_t colIdx, tSQLExprIt } // 2. check if sql function can be applied on this column data type - pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex); - SSchema* pSchema = tsGetColumnSchema(pMeterMetaInfo->pMeterMeta, index.columnIndex); + pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index.tableIndex); + SSchema* pSchema = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, index.columnIndex); int16_t colType = pSchema->type; if (colType <= TSDB_DATA_TYPE_BOOL || colType >= TSDB_DATA_TYPE_BINARY) { @@ -1637,7 +1651,7 @@ int32_t addExprAndResultField(SQueryInfo* pQueryInfo, int32_t colIdx, tSQLExprIt } // functions can not be applied to tags - if (index.columnIndex >= pMeterMetaInfo->pMeterMeta->numOfColumns) { + if (index.columnIndex >= tscGetNumOfColumns(pTableMetaInfo->pTableMeta)) { return invalidSqlErrMsg(pQueryInfo->msg, msg6); } @@ -1711,10 +1725,10 @@ int32_t addExprAndResultField(SQueryInfo* pQueryInfo, int32_t colIdx, tSQLExprIt return invalidSqlErrMsg(pQueryInfo->msg, msg4); } - pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex); - SSchema* pSchema = tsGetSchema(pMeterMetaInfo->pMeterMeta); + pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index.tableIndex); + SSchema* pSchema = tscGetTableSchema(pTableMetaInfo->pTableMeta); - for (int32_t j = 0; j < pMeterMetaInfo->pMeterMeta->numOfColumns; ++j) { + for (int32_t j = 0; j < tscGetNumOfColumns(pTableMetaInfo->pTableMeta); ++j) { index.columnIndex = j; if (setExprInfoForFunctions(pQueryInfo, pSchema, functionID, pItem->aliasName, colIdx++, &index) != 0) { return TSDB_CODE_INVALID_SQL; @@ -1726,11 +1740,11 @@ int32_t addExprAndResultField(SQueryInfo* pQueryInfo, int32_t colIdx, tSQLExprIt return invalidSqlErrMsg(pQueryInfo->msg, msg3); } - pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex); - SSchema* pSchema = tsGetSchema(pMeterMetaInfo->pMeterMeta); + pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index.tableIndex); + SSchema* pSchema = tscGetTableSchema(pTableMetaInfo->pTableMeta); // functions can not be applied to tags - if ((index.columnIndex >= pMeterMetaInfo->pMeterMeta->numOfColumns) || (index.columnIndex < 0)) { + if ((index.columnIndex >= tscGetNumOfColumns(pTableMetaInfo->pTableMeta)) || (index.columnIndex < 0)) { return invalidSqlErrMsg(pQueryInfo->msg, msg6); } @@ -1745,10 +1759,10 @@ int32_t addExprAndResultField(SQueryInfo* pQueryInfo, int32_t colIdx, tSQLExprIt int32_t numOfFields = 0; for (int32_t j = 0; j < pQueryInfo->numOfTables; ++j) { - pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, j); - SSchema* pSchema = tsGetSchema(pMeterMetaInfo->pMeterMeta); + pTableMetaInfo = tscGetMetaInfo(pQueryInfo, j); + SSchema* pSchema = tscGetTableSchema(pTableMetaInfo->pTableMeta); - for (int32_t i = 0; i < pMeterMetaInfo->pMeterMeta->numOfColumns; ++i) { + for (int32_t i = 0; i < tscGetNumOfColumns(pTableMetaInfo->pTableMeta); ++i) { SColumnIndex index = {.tableIndex = j, .columnIndex = i}; if (setExprInfoForFunctions(pQueryInfo, pSchema, functionID, pItem->aliasName, colIdx + i + j, &index) != 0) { @@ -1756,7 +1770,7 @@ int32_t addExprAndResultField(SQueryInfo* pQueryInfo, int32_t colIdx, tSQLExprIt } } - numOfFields += pMeterMetaInfo->pMeterMeta->numOfColumns; + numOfFields += tscGetNumOfColumns(pTableMetaInfo->pTableMeta); } return TSDB_CODE_SUCCESS; @@ -1782,11 +1796,11 @@ int32_t addExprAndResultField(SQueryInfo* pQueryInfo, int32_t colIdx, tSQLExprIt return invalidSqlErrMsg(pQueryInfo->msg, msg3); } - pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex); - SSchema* pSchema = tsGetSchema(pMeterMetaInfo->pMeterMeta); + pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index.tableIndex); + SSchema* pSchema = tscGetTableSchema(pTableMetaInfo->pTableMeta); // functions can not be applied to tags - if (index.columnIndex >= pMeterMetaInfo->pMeterMeta->numOfColumns) { + if (index.columnIndex >= tscGetNumOfColumns(pTableMetaInfo->pTableMeta)) { return invalidSqlErrMsg(pQueryInfo->msg, msg6); } @@ -1916,10 +1930,10 @@ static bool isTablenameToken(SSQLToken* token) { } static int16_t doGetColumnIndex(SQueryInfo* pQueryInfo, int32_t index, SSQLToken* pToken) { - STableMeta* pMeterMeta = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index)->pMeterMeta; + STableMeta* pTableMeta = tscGetMetaInfo(pQueryInfo, index)->pTableMeta; - int32_t numOfCols = pMeterMeta->numOfColumns + pMeterMeta->numOfTags; - SSchema* pSchema = tsGetSchema(pMeterMeta); + int32_t numOfCols = tscGetNumOfColumns(pTableMeta) + tscGetNumOfTags(pTableMeta); + SSchema* pSchema = tscGetTableSchema(pTableMeta); int16_t columnIndex = COLUMN_INDEX_INITIAL_VAL; @@ -1991,8 +2005,8 @@ int32_t getMeterIndex(SSQLToken* pTableToken, SQueryInfo* pQueryInfo, SColumnInd char tableName[TSDB_TABLE_ID_LEN + 1] = {0}; for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, i); - extractTableName(pMeterMetaInfo->name, tableName); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, i); + extractTableName(pTableMetaInfo->name, tableName); if (strncasecmp(tableName, pTableToken->z, pTableToken->n) == 0 && strlen(tableName) == pTableToken->n) { pIndex->tableIndex = i; @@ -2019,7 +2033,7 @@ int32_t getTableIndexByName(SSQLToken* pToken, SQueryInfo* pQueryInfo, SColumnIn } int32_t getColumnIndexByName(SSQLToken* pToken, SQueryInfo* pQueryInfo, SColumnIndex* pIndex) { - if (pQueryInfo->pMeterInfo == NULL || pQueryInfo->numOfTables == 0) { + if (pQueryInfo->pTableMetaInfo == NULL || pQueryInfo->numOfTables == 0) { return TSDB_CODE_INVALID_SQL; } @@ -2115,7 +2129,7 @@ int32_t changeFunctionID(int32_t optr, int16_t* functionId) { int32_t setShowInfo(SSqlObj* pSql, struct SSqlInfo* pInfo) { SSqlCmd* pCmd = &pSql->cmd; - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); + STableMetaInfo* pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); assert(pCmd->numOfClause == 1); pCmd->command = TSDB_SQL_SHOW; @@ -2151,7 +2165,7 @@ int32_t setShowInfo(SSqlObj* pSql, struct SSqlInfo* pInfo) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg1); } - int32_t ret = setObjFullName(pMeterMetaInfo->name, getAccountId(pSql), pDbPrefixToken, NULL, NULL); + int32_t ret = setObjFullName(pTableMetaInfo->name, getAccountId(pSql), pDbPrefixToken, NULL, NULL); if (ret != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg1); } @@ -2235,13 +2249,13 @@ bool validateIpAddress(const char* ip, size_t size) { } int32_t tscTansformSQLFunctionForSTableQuery(SQueryInfo* pQueryInfo) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); - if (pMeterMetaInfo->pMeterMeta == NULL || !UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { + if (pTableMetaInfo->pTableMeta == NULL || !UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { return TSDB_CODE_INVALID_SQL; } - assert(pMeterMetaInfo->pMeterMeta->numOfTags >= 0); + assert(tscGetNumOfTags(pTableMetaInfo->pTableMeta) >= 0); int16_t bytes = 0; int16_t type = 0; @@ -2252,7 +2266,7 @@ int32_t tscTansformSQLFunctionForSTableQuery(SQueryInfo* pQueryInfo) { int16_t functionId = aAggs[pExpr->functionId].stableFuncId; int32_t colIndex = pExpr->colInfo.colIdx; - SSchema* pSrcSchema = tsGetColumnSchema(pMeterMetaInfo->pMeterMeta, colIndex); + SSchema* pSrcSchema = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, colIndex); if ((functionId >= TSDB_FUNC_SUM && functionId <= TSDB_FUNC_TWA) || (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) || @@ -2274,14 +2288,14 @@ int32_t tscTansformSQLFunctionForSTableQuery(SQueryInfo* pQueryInfo) { /* transfer the field-info back to original input format */ void tscRestoreSQLFunctionForMetricQuery(SQueryInfo* pQueryInfo) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - if (!UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + if (!UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { return; } for (int32_t i = 0; i < pQueryInfo->exprsInfo.numOfExprs; ++i) { SSqlExpr* pExpr = tscSqlExprGet(pQueryInfo, i); - SSchema* pSchema = tsGetColumnSchema(pMeterMetaInfo->pMeterMeta, pExpr->colInfo.colIdx); + SSchema* pSchema = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, pExpr->colInfo.colIdx); // if (/*(pExpr->functionId >= TSDB_FUNC_FIRST_DST && pExpr->functionId <= TSDB_FUNC_LAST_DST) || // (pExpr->functionId >= TSDB_FUNC_SUM && pExpr->functionId <= TSDB_FUNC_MAX) || @@ -2371,7 +2385,7 @@ static bool functionCompatibleCheck(SQueryInfo* pQueryInfo) { } void updateTagColumnIndex(SQueryInfo* pQueryInfo, int32_t tableIndex) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, tableIndex); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableIndex); /* * update tags column index for group by tags @@ -2381,8 +2395,8 @@ void updateTagColumnIndex(SQueryInfo* pQueryInfo, int32_t tableIndex) { for (int32_t i = 0; i < pQueryInfo->groupbyExpr.numOfGroupCols; ++i) { int32_t index = pQueryInfo->groupbyExpr.columnInfo[i].colIdx; - for (int32_t j = 0; j < pMeterMetaInfo->numOfTags; ++j) { - int32_t tagColIndex = pMeterMetaInfo->tagColumnIndex[j]; + for (int32_t j = 0; j < pTableMetaInfo->numOfTags; ++j) { + int32_t tagColIndex = pTableMetaInfo->tagColumnIndex[j]; if (tagColIndex == index) { pQueryInfo->groupbyExpr.columnInfo[i].colIdx = j; break; @@ -2400,12 +2414,12 @@ void updateTagColumnIndex(SQueryInfo* pQueryInfo, int32_t tableIndex) { } // not belongs to this table - if (pExpr->uid != pMeterMetaInfo->pMeterMeta->uid) { + if (pExpr->uid != pTableMetaInfo->pTableMeta->uid) { continue; } - for (int32_t j = 0; j < pMeterMetaInfo->numOfTags; ++j) { - if (pExpr->colInfo.colIdx == pMeterMetaInfo->tagColumnIndex[j]) { + for (int32_t j = 0; j < pTableMetaInfo->numOfTags; ++j) { + if (pExpr->colInfo.colIdx == pTableMetaInfo->tagColumnIndex[j]) { pExpr->colInfo.colIdx = j; break; } @@ -2421,17 +2435,17 @@ void updateTagColumnIndex(SQueryInfo* pQueryInfo, int32_t tableIndex) { assert(pJoinInfo->left.uid != pJoinInfo->right.uid); // the join condition expression node belongs to this table(super table) - if (pMeterMetaInfo->pMeterMeta->uid == pJoinInfo->left.uid) { - for (int32_t i = 0; i < pMeterMetaInfo->numOfTags; ++i) { - if (pJoinInfo->left.tagCol == pMeterMetaInfo->tagColumnIndex[i]) { + if (pTableMetaInfo->pTableMeta->uid == pJoinInfo->left.uid) { + for (int32_t i = 0; i < pTableMetaInfo->numOfTags; ++i) { + if (pJoinInfo->left.tagCol == pTableMetaInfo->tagColumnIndex[i]) { pJoinInfo->left.tagCol = i; } } } - if (pMeterMetaInfo->pMeterMeta->uid == pJoinInfo->right.uid) { - for (int32_t i = 0; i < pMeterMetaInfo->numOfTags; ++i) { - if (pJoinInfo->right.tagCol == pMeterMetaInfo->tagColumnIndex[i]) { + if (pTableMetaInfo->pTableMeta->uid == pJoinInfo->right.uid) { + for (int32_t i = 0; i < pTableMetaInfo->numOfTags; ++i) { + if (pJoinInfo->right.tagCol == pTableMetaInfo->tagColumnIndex[i]) { pJoinInfo->right.tagCol = i; } } @@ -2447,7 +2461,7 @@ int32_t parseGroupbyClause(SQueryInfo* pQueryInfo, tVariantList* pList, SSqlCmd* const char* msg9 = "tags not allowed for table query"; // todo : handle two meter situation - SMeterMetaInfo* pMeterMetaInfo = NULL; + STableMetaInfo* pTableMetaInfo = NULL; if (pList == NULL) { return TSDB_CODE_SUCCESS; @@ -2458,9 +2472,9 @@ int32_t parseGroupbyClause(SQueryInfo* pQueryInfo, tVariantList* pList, SSqlCmd* return invalidSqlErrMsg(pQueryInfo->msg, msg1); } - STableMeta* pMeterMeta = NULL; + STableMeta* pTableMeta = NULL; SSchema* pSchema = NULL; - SSchema s = tsGetTbnameColumnSchema(); + SSchema s = tscGetTbnameColumnSchema(); int32_t tableIndex = COLUMN_INDEX_INITIAL_VAL; @@ -2480,28 +2494,28 @@ int32_t parseGroupbyClause(SQueryInfo* pQueryInfo, tVariantList* pList, SSqlCmd* tableIndex = index.tableIndex; - pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex); - pMeterMeta = pMeterMetaInfo->pMeterMeta; + pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index.tableIndex); + pTableMeta = pTableMetaInfo->pTableMeta; if (index.columnIndex == TSDB_TBNAME_COLUMN_INDEX) { pSchema = &s; } else { - pSchema = tsGetColumnSchema(pMeterMeta, index.columnIndex); + pSchema = tscGetTableColumnSchema(pTableMeta, index.columnIndex); } bool groupTag = false; - if (index.columnIndex == TSDB_TBNAME_COLUMN_INDEX || index.columnIndex >= pMeterMeta->numOfColumns) { + if (index.columnIndex == TSDB_TBNAME_COLUMN_INDEX || index.columnIndex >= tscGetNumOfColumns(pTableMeta)) { groupTag = true; } if (groupTag) { - if (!UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { + if (!UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { return invalidSqlErrMsg(pQueryInfo->msg, msg9); } int32_t relIndex = index.columnIndex; if (index.columnIndex != TSDB_TBNAME_COLUMN_INDEX) { - relIndex -= pMeterMeta->numOfColumns; + relIndex -= tscGetNumOfColumns(pTableMeta); } pQueryInfo->groupbyExpr.columnInfo[i] = @@ -2561,9 +2575,9 @@ static int32_t doExtractColumnFilterInfo(SQueryInfo* pQueryInfo, SColumnFilterIn const char* msg = "not supported filter condition"; tSQLExpr* pRight = pExpr->pRight; - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, columnIndex->tableIndex); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, columnIndex->tableIndex); - SSchema* pSchema = tsGetColumnSchema(pMeterMetaInfo->pMeterMeta, columnIndex->columnIndex); + SSchema* pSchema = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, columnIndex->columnIndex); int16_t colType = pSchema->type; if (colType >= TSDB_DATA_TYPE_TINYINT && colType <= TSDB_DATA_TYPE_BIGINT) { @@ -2806,10 +2820,10 @@ enum { }; static int32_t extractColumnFilterInfo(SQueryInfo* pQueryInfo, SColumnIndex* pIndex, tSQLExpr* pExpr, int32_t sqlOptr) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, pIndex->tableIndex); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, pIndex->tableIndex); - STableMeta* pMeterMeta = pMeterMetaInfo->pMeterMeta; - SSchema* pSchema = tsGetColumnSchema(pMeterMeta, pIndex->columnIndex); + STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; + SSchema* pSchema = tscGetTableColumnSchema(pTableMeta, pIndex->columnIndex); const char* msg1 = "non binary column not support like operator"; const char* msg2 = "binary column not support this operator"; @@ -2966,24 +2980,24 @@ static int32_t getJoinCondInfo(SQueryInfo* pQueryInfo, tSQLExpr* pExpr) { return TSDB_CODE_INVALID_SQL; } - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex); - int16_t tagColIndex = index.columnIndex - pMeterMetaInfo->pMeterMeta->numOfColumns; + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index.tableIndex); + int16_t tagColIndex = index.columnIndex - tscGetNumOfColumns(pTableMetaInfo->pTableMeta); - pLeft->uid = pMeterMetaInfo->pMeterMeta->uid; + pLeft->uid = pTableMetaInfo->pTableMeta->uid; pLeft->tagCol = tagColIndex; - strcpy(pLeft->tableId, pMeterMetaInfo->name); + strcpy(pLeft->tableId, pTableMetaInfo->name); index = (SColumnIndex)COLUMN_INDEX_INITIALIZER; if (getColumnIndexByName(&pExpr->pRight->colInfo, pQueryInfo, &index) != TSDB_CODE_SUCCESS) { return TSDB_CODE_INVALID_SQL; } - pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex); - tagColIndex = index.columnIndex - pMeterMetaInfo->pMeterMeta->numOfColumns; + pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index.tableIndex); + tagColIndex = index.columnIndex - tscGetNumOfColumns(pTableMetaInfo->pTableMeta); - pRight->uid = pMeterMetaInfo->pMeterMeta->uid; + pRight->uid = pTableMetaInfo->pTableMeta->uid; pRight->tagCol = tagColIndex; - strcpy(pRight->tableId, pMeterMetaInfo->name); + strcpy(pRight->tableId, pTableMetaInfo->name); pTagCond->joinInfo.hasJoin = true; return TSDB_CODE_SUCCESS; @@ -3035,8 +3049,9 @@ static int32_t validateSQLExpr(tSQLExpr* pExpr, SQueryInfo* pQueryInfo, SColumnL } // if column is timestamp, bool, binary, nchar, not support arithmetic, so return invalid sql - STableMeta* pMeterMeta = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex)->pMeterMeta; - SSchema* pSchema = tsGetSchema(pMeterMeta) + index.columnIndex; + STableMeta* pTableMeta = tscGetMetaInfo(pQueryInfo, index.tableIndex)->pTableMeta; + SSchema* pSchema = tscGetTableSchema(pTableMeta) + index.columnIndex; + if ((pSchema->type == TSDB_DATA_TYPE_TIMESTAMP) || (pSchema->type == TSDB_DATA_TYPE_BOOL) || (pSchema->type == TSDB_DATA_TYPE_BINARY) || (pSchema->type == TSDB_DATA_TYPE_NCHAR)) { return TSDB_CODE_INVALID_SQL; @@ -3183,12 +3198,12 @@ static bool validateJoinExprNode(SQueryInfo* pQueryInfo, tSQLExpr* pExpr, SColum } // todo extract function - SMeterMetaInfo* pLeftMeterMeta = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, pLeftIndex->tableIndex); - SSchema* pLeftSchema = tsGetSchema(pLeftMeterMeta->pMeterMeta); + STableMetaInfo* pLeftMeterMeta = tscGetMetaInfo(pQueryInfo, pLeftIndex->tableIndex); + SSchema* pLeftSchema = tscGetTableSchema(pLeftMeterMeta->pTableMeta); int16_t leftType = pLeftSchema[pLeftIndex->columnIndex].type; - SMeterMetaInfo* pRightMeterMeta = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, rightIndex.tableIndex); - SSchema* pRightSchema = tsGetSchema(pRightMeterMeta->pMeterMeta); + STableMetaInfo* pRightMeterMeta = tscGetMetaInfo(pQueryInfo, rightIndex.tableIndex); + SSchema* pRightSchema = tscGetTableSchema(pRightMeterMeta->pTableMeta); int16_t rightType = pRightSchema[rightIndex.columnIndex].type; if (leftType != rightType) { @@ -3203,7 +3218,7 @@ static bool validateJoinExprNode(SQueryInfo* pQueryInfo, tSQLExpr* pExpr, SColum } // table to table/ super table to super table are allowed - if (UTIL_METER_IS_SUPERTABLE(pLeftMeterMeta) != UTIL_METER_IS_SUPERTABLE(pRightMeterMeta)) { + if (UTIL_TABLE_IS_SUPERTABLE(pLeftMeterMeta) != UTIL_TABLE_IS_SUPERTABLE(pRightMeterMeta)) { invalidSqlErrMsg(pQueryInfo->msg, msg5); return false; } @@ -3259,8 +3274,8 @@ static int32_t handleExprInQueryCond(SQueryInfo* pQueryInfo, tSQLExpr** pExpr, S assert(isExprDirectParentOfLeaftNode(*pExpr)); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex); - STableMeta* pMeterMeta = pMeterMetaInfo->pMeterMeta; + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index.tableIndex); + STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; if (index.columnIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX) { // query on time range if (!validateJoinExprNode(pQueryInfo, *pExpr, &index)) { @@ -3283,10 +3298,10 @@ static int32_t handleExprInQueryCond(SQueryInfo* pQueryInfo, tSQLExpr** pExpr, S *pExpr = NULL; // remove this expression *type = TSQL_EXPR_TS; - } else if (index.columnIndex >= pMeterMeta->numOfColumns || + } else if (index.columnIndex >= tscGetNumOfColumns(pTableMeta) || index.columnIndex == TSDB_TBNAME_COLUMN_INDEX) { // query on tags // check for tag query condition - if (UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { + if (UTIL_TABLE_IS_NOMRAL_TABLE(pTableMetaInfo)) { return invalidSqlErrMsg(pQueryInfo->msg, msg1); } @@ -3296,7 +3311,7 @@ static int32_t handleExprInQueryCond(SQueryInfo* pQueryInfo, tSQLExpr** pExpr, S return TSDB_CODE_INVALID_SQL; } - SSchema* pSchema = tsGetSchema(pMeterMetaInfo->pMeterMeta); + SSchema* pSchema = tscGetTableSchema(pTableMetaInfo->pTableMeta); if ((!isTablenameToken(&pLeft->colInfo)) && pSchema[index.columnIndex].type != TSDB_DATA_TYPE_BINARY && pSchema[index.columnIndex].type != TSDB_DATA_TYPE_NCHAR) { @@ -3494,10 +3509,10 @@ static int32_t setTableCondForMetricQuery(SQueryInfo* pQueryInfo, const char* ac return TSDB_CODE_SUCCESS; } - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, tableCondIndex); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableCondIndex); STagCond* pTagCond = &pQueryInfo->tagCond; - pTagCond->tbnameCond.uid = pMeterMetaInfo->pMeterMeta->uid; + pTagCond->tbnameCond.uid = pTableMetaInfo->pTableMeta->uid; assert(pExpr->nSQLOptr == TK_LIKE || pExpr->nSQLOptr == TK_IN); @@ -3527,8 +3542,9 @@ static int32_t setTableCondForMetricQuery(SQueryInfo* pQueryInfo, const char* ac } num = j; - SSQLToken dbToken = extractDBName(pMeterMetaInfo->name, db); - + char* name = extractDBName(pTableMetaInfo->name, db); + SSQLToken dbToken = {.type = TK_STRING, .z = name, .n = strlen(name)}; + for (int32_t i = 0; i < num; ++i) { if (i >= 1) { taosStringBuilderAppendStringLen(&sb1, TBNAME_LIST_SEP, 1); @@ -3604,15 +3620,15 @@ static int32_t getTimeRangeFromExpr(SQueryInfo* pQueryInfo, tSQLExpr* pExpr) { return TSDB_CODE_INVALID_SQL; } - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex); - STableMeta* pMeterMeta = pMeterMetaInfo->pMeterMeta; - + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index.tableIndex); + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + tSQLExpr* pRight = pExpr->pRight; TSKEY stime = 0; TSKEY etime = INT64_MAX; - if (getTimeRange(&stime, &etime, pRight, pExpr->nSQLOptr, pMeterMeta->precision) != TSDB_CODE_SUCCESS) { + if (getTimeRange(&stime, &etime, pRight, pExpr->nSQLOptr, tinfo.precision) != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(pQueryInfo->msg, msg0); } @@ -3642,8 +3658,8 @@ static int32_t validateJoinExpr(SQueryInfo* pQueryInfo, SCondExpr* pCondExpr) { } } - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - if (UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { // for stable join, tag columns + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + if (UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { // for stable join, tag columns // must be present for join if (pCondExpr->pJoinExpr == NULL) { return invalidSqlErrMsg(pQueryInfo->msg, msg1); @@ -3680,20 +3696,20 @@ static void cleanQueryExpr(SCondExpr* pCondExpr) { } static void doAddJoinTagsColumnsIntoTagList(SQueryInfo* pQueryInfo, SCondExpr* pCondExpr) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - if (QUERY_IS_JOIN_QUERY(pQueryInfo->type) && UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + if (QUERY_IS_JOIN_QUERY(pQueryInfo->type) && UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { SColumnIndex index = {0}; getColumnIndexByName(&pCondExpr->pJoinExpr->pLeft->colInfo, pQueryInfo, &index); - pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex); + pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index.tableIndex); - int32_t columnInfo = index.columnIndex - pMeterMetaInfo->pMeterMeta->numOfColumns; + int32_t columnInfo = index.columnIndex - tscGetNumOfColumns(pTableMetaInfo->pTableMeta); addRequiredTagColumn(pQueryInfo, columnInfo, index.tableIndex); getColumnIndexByName(&pCondExpr->pJoinExpr->pRight->colInfo, pQueryInfo, &index); - pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index.tableIndex); + pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index.tableIndex); - columnInfo = index.columnIndex - pMeterMetaInfo->pMeterMeta->numOfColumns; + columnInfo = index.columnIndex - tscGetNumOfColumns(pTableMetaInfo->pTableMeta); addRequiredTagColumn(pQueryInfo, columnInfo, index.tableIndex); } } @@ -3705,7 +3721,7 @@ static int32_t getTagQueryCondExpr(SQueryInfo* pQueryInfo, SCondExpr* pCondExpr, for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { tSQLExpr* p1 = extractExprForSTable(pExpr, pQueryInfo, i); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, i); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, i); char c[TSDB_MAX_TAGS_LEN] = {0}; char* str = c; @@ -3714,7 +3730,7 @@ static int32_t getTagQueryCondExpr(SQueryInfo* pQueryInfo, SCondExpr* pCondExpr, return ret; } - tsSetMetricQueryCond(&pQueryInfo->tagCond, pMeterMetaInfo->pMeterMeta->uid, c); + tsSetMetricQueryCond(&pQueryInfo->tagCond, pTableMetaInfo->pTableMeta->uid, c); doCompactQueryExpr(pExpr); tSQLExprDestroy(p1); @@ -4019,7 +4035,7 @@ int32_t parseFillClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySQL) { static void setDefaultOrderInfo(SQueryInfo* pQueryInfo) { /* set default timestamp order information for all queries */ pQueryInfo->order.order = TSQL_SO_ASC; - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); if (isTopBottomQuery(pQueryInfo)) { pQueryInfo->order.order = TSQL_SO_ASC; @@ -4029,7 +4045,7 @@ static void setDefaultOrderInfo(SQueryInfo* pQueryInfo) { } /* for metric query, set default ascending order for group output */ - if (UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { + if (UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { pQueryInfo->groupbyExpr.orderType = TSQL_SO_ASC; } } @@ -4041,7 +4057,7 @@ int32_t parseOrderbyClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql, SSchema const char* msg3 = "only support order by primary timestamp and first tag in groupby clause"; setDefaultOrderInfo(pQueryInfo); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); if (pQuerySql->pSortOrder == NULL) { return TSDB_CODE_SUCCESS; @@ -4055,7 +4071,7 @@ int32_t parseOrderbyClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql, SSchema * * for super table query, the order option must be less than 3. */ - if (UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { + if (UTIL_TABLE_IS_NOMRAL_TABLE(pTableMetaInfo)) { if (pSortorder->nExpr > 1) { return invalidSqlErrMsg(pQueryInfo->msg, msg0); } @@ -4076,7 +4092,7 @@ int32_t parseOrderbyClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql, SSchema SSQLToken columnName = {pVar->nLen, pVar->nType, pVar->pz}; SColumnIndex index = {0}; - if (UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { // metric query + if (UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { // metric query if (getColumnIndexByName(&columnName, pQueryInfo, &index) != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(pQueryInfo->msg, msg1); } @@ -4084,8 +4100,8 @@ int32_t parseOrderbyClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql, SSchema bool orderByTags = false; bool orderByTS = false; - if (index.columnIndex >= pMeterMetaInfo->pMeterMeta->numOfColumns) { - int32_t relTagIndex = index.columnIndex - pMeterMetaInfo->pMeterMeta->numOfColumns; + if (index.columnIndex >= tscGetNumOfColumns(pTableMetaInfo->pTableMeta)) { + int32_t relTagIndex = index.columnIndex - tscGetNumOfColumns(pTableMetaInfo->pTableMeta); if (relTagIndex == pQueryInfo->groupbyExpr.columnInfo[0].colIdx) { orderByTags = true; } @@ -4105,7 +4121,7 @@ int32_t parseOrderbyClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql, SSchema if (pSortorder->nExpr == 1) { if (orderByTags) { - pQueryInfo->groupbyExpr.orderIndex = index.columnIndex - pMeterMetaInfo->pMeterMeta->numOfColumns; + pQueryInfo->groupbyExpr.orderIndex = index.columnIndex - tscGetNumOfColumns(pTableMetaInfo->pTableMeta); pQueryInfo->groupbyExpr.orderType = pQuerySql->pSortOrder->a[0].sortOrder; } else if (isTopBottomQuery(pQueryInfo)) { /* order of top/bottom query in interval is not valid */ @@ -4128,7 +4144,7 @@ int32_t parseOrderbyClause(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql, SSchema if (pSortorder->nExpr == 2) { if (orderByTags) { - pQueryInfo->groupbyExpr.orderIndex = index.columnIndex - pMeterMetaInfo->pMeterMeta->numOfColumns; + pQueryInfo->groupbyExpr.orderIndex = index.columnIndex - tscGetNumOfColumns(pTableMetaInfo->pTableMeta); pQueryInfo->groupbyExpr.orderType = pQuerySql->pSortOrder->a[0].sortOrder; } else { pQueryInfo->order.order = pSortorder->a[0].sortOrder; @@ -4193,32 +4209,32 @@ int32_t setAlterTableInfo(SSqlObj* pSql, struct SSqlInfo* pInfo) { SAlterTableSQL* pAlterSQL = pInfo->pAlterInfo; SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, DEFAULT_TABLE_INDEX); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, DEFAULT_TABLE_INDEX); if (tscValidateName(&(pAlterSQL->name)) != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(pQueryInfo->msg, msg1); } - if (setMeterID(pMeterMetaInfo, &(pAlterSQL->name), pSql) != TSDB_CODE_SUCCESS) { + if (setMeterID(pTableMetaInfo, &(pAlterSQL->name), pSql) != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(pQueryInfo->msg, msg2); } - int32_t ret = tscGetMeterMeta(pSql, pMeterMetaInfo); + int32_t ret = tscGetTableMeta(pSql, pTableMetaInfo); if (ret != TSDB_CODE_SUCCESS) { return ret; } - STableMeta* pMeterMeta = pMeterMetaInfo->pMeterMeta; + STableMeta* pTableMeta = pTableMetaInfo->pTableMeta; if (pAlterSQL->type == TSDB_ALTER_TABLE_ADD_TAG_COLUMN || pAlterSQL->type == TSDB_ALTER_TABLE_DROP_TAG_COLUMN || pAlterSQL->type == TSDB_ALTER_TABLE_CHANGE_TAG_COLUMN) { - if (UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { + if (UTIL_TABLE_IS_NOMRAL_TABLE(pTableMetaInfo)) { return invalidSqlErrMsg(pQueryInfo->msg, msg3); } - } else if ((pAlterSQL->type == TSDB_ALTER_TABLE_UPDATE_TAG_VAL) && (UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo))) { + } else if ((pAlterSQL->type == TSDB_ALTER_TABLE_UPDATE_TAG_VAL) && (UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo))) { return invalidSqlErrMsg(pQueryInfo->msg, msg4); } else if ((pAlterSQL->type == TSDB_ALTER_TABLE_ADD_COLUMN || pAlterSQL->type == TSDB_ALTER_TABLE_DROP_COLUMN) && - UTIL_METER_IS_CREATE_FROM_METRIC(pMeterMetaInfo)) { + UTIL_TABLE_CREATE_FROM_STABLE(pTableMetaInfo)) { return invalidSqlErrMsg(pQueryInfo->msg, msg6); } @@ -4240,7 +4256,7 @@ int32_t setAlterTableInfo(SSqlObj* pSql, struct SSqlInfo* pInfo) { const char* msg4 = "illegal tag name"; const char* msg5 = "primary tag cannot be dropped"; - if (pMeterMeta->numOfTags == 1) { + if (tscGetNumOfTags(pTableMeta) == 1) { return invalidSqlErrMsg(pQueryInfo->msg, msg1); } @@ -4261,7 +4277,7 @@ int32_t setAlterTableInfo(SSqlObj* pSql, struct SSqlInfo* pInfo) { return TSDB_CODE_INVALID_SQL; } - if (index.columnIndex < pMeterMeta->numOfColumns) { + if (index.columnIndex < tscGetNumOfColumns(pTableMeta)) { return invalidSqlErrMsg(pQueryInfo->msg, msg4); } else if (index.columnIndex == 0) { return invalidSqlErrMsg(pQueryInfo->msg, msg5); @@ -4327,11 +4343,11 @@ int32_t setAlterTableInfo(SSqlObj* pSql, struct SSqlInfo* pInfo) { return TSDB_CODE_INVALID_SQL; } - if (columnIndex.columnIndex < pMeterMeta->numOfColumns) { + if (columnIndex.columnIndex < tscGetNumOfColumns(pTableMeta)) { return invalidSqlErrMsg(pQueryInfo->msg, msg2); } - SSchema* pTagsSchema = tsGetColumnSchema(pMeterMetaInfo->pMeterMeta, columnIndex.columnIndex); + SSchema* pTagsSchema = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, columnIndex.columnIndex); if (tVariantDump(&pVarList->a[1].pVar, pAlterSQL->tagData.data /*pCmd->payload*/, pTagsSchema->type) != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(pQueryInfo->msg, msg1); @@ -4366,7 +4382,7 @@ int32_t setAlterTableInfo(SSqlObj* pSql, struct SSqlInfo* pInfo) { const char* msg4 = "illegal column name"; const char* msg3 = "primary timestamp column cannot be dropped"; - if (pMeterMeta->numOfColumns == TSDB_MIN_COLUMNS) { // + if (tscGetNumOfColumns(pTableMeta) == TSDB_MIN_COLUMNS) { // return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg1); } @@ -4586,7 +4602,7 @@ bool hasTimestampForPointInterpQuery(SQueryInfo* pQueryInfo) { } int32_t parseLimitClause(SQueryInfo* pQueryInfo, int32_t clauseIndex, SQuerySQL* pQuerySql, SSqlObj* pSql) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); const char* msg0 = "soffset/offset can not be less than 0"; const char* msg1 = "slimit/soffset only available for STable query"; @@ -4611,7 +4627,7 @@ int32_t parseLimitClause(SQueryInfo* pQueryInfo, int32_t clauseIndex, SQuerySQL* return TSDB_CODE_SUCCESS; } - if (UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { + if (UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { bool queryOnTags = false; if (tscQueryOnlyMetricTags(pQueryInfo, &queryOnTags) != TSDB_CODE_SUCCESS) { return TSDB_CODE_INVALID_SQL; @@ -4650,8 +4666,8 @@ int32_t parseLimitClause(SQueryInfo* pQueryInfo, int32_t clauseIndex, SQuerySQL* } // No tables included. No results generated. Query results are empty. - SSuperTableMeta* pMetricMeta = pMeterMetaInfo->pMetricMeta; - if (pMeterMetaInfo->pMeterMeta == NULL || pMetricMeta == NULL || pMetricMeta->numOfTables == 0) { + SSuperTableMeta* pMetricMeta = pTableMetaInfo->pMetricMeta; + if (pTableMetaInfo->pTableMeta == NULL || pMetricMeta == NULL || pMetricMeta->numOfTables == 0) { tscTrace("%p no table in metricmeta, no output result", pSql); pQueryInfo->command = TSDB_SQL_RETRIEVE_EMPTY_RESULT; } @@ -4689,7 +4705,7 @@ int32_t parseLimitClause(SQueryInfo* pQueryInfo, int32_t clauseIndex, SQuerySQL* return TSDB_CODE_SUCCESS; } -static int32_t setKeepOption(SSqlCmd* pCmd, SCreateDbMsg* pMsg, SCreateDBInfo* pCreateDb) { +static int32_t setKeepOption(SSqlCmd* pCmd, SCMCreateDbMsg* pMsg, SCreateDBInfo* pCreateDb) { const char* msg = "invalid number of options"; pMsg->daysToKeep = htonl(-1); @@ -4720,7 +4736,7 @@ static int32_t setKeepOption(SSqlCmd* pCmd, SCreateDbMsg* pMsg, SCreateDBInfo* p return TSDB_CODE_SUCCESS; } -static int32_t setTimePrecisionOption(SSqlCmd* pCmd, SCreateDbMsg* pMsg, SCreateDBInfo* pCreateDbInfo) { +static int32_t setTimePrecisionOption(SSqlCmd* pCmd, SCMCreateDbMsg* pMsg, SCreateDBInfo* pCreateDbInfo) { const char* msg = "invalid time precision"; pMsg->precision = TSDB_TIME_PRECISION_MILLI; // millisecond by default @@ -4744,7 +4760,7 @@ static int32_t setTimePrecisionOption(SSqlCmd* pCmd, SCreateDbMsg* pMsg, SCreate return TSDB_CODE_SUCCESS; } -static void setCreateDBOption(SCreateDbMsg* pMsg, SCreateDBInfo* pCreateDb) { +static void setCreateDBOption(SCMCreateDbMsg* pMsg, SCreateDBInfo* pCreateDb) { pMsg->blocksPerTable = htons(pCreateDb->numOfBlocksPerTable); pMsg->compression = pCreateDb->compressionLevel; @@ -4759,7 +4775,7 @@ static void setCreateDBOption(SCreateDbMsg* pMsg, SCreateDBInfo* pCreateDb) { } int32_t parseCreateDBOptions(SSqlCmd* pCmd, SCreateDBInfo* pCreateDbSql) { - SCreateDbMsg* pMsg = (SCreateDbMsg*)(pCmd->payload); + SCMCreateDbMsg* pMsg = (SCMCreateDbMsg*)(pCmd->payload); setCreateDBOption(pMsg, pCreateDbSql); if (setKeepOption(pCmd, pMsg, pCreateDbSql) != TSDB_CODE_SUCCESS) { @@ -4807,10 +4823,10 @@ void addGroupInfoForSubquery(SSqlObj* pParentObj, SSqlObj* pSql, int32_t subClau SSqlExpr* pExpr = tscSqlExprGet(pQueryInfo, num - 1); if (pExpr->functionId != TSDB_FUNC_TAG) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, tableIndex); - int16_t columnInfo = tscGetJoinTagColIndexByUid(&pQueryInfo->tagCond, pMeterMetaInfo->pMeterMeta->uid); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableIndex); + int16_t columnInfo = tscGetJoinTagColIndexByUid(&pQueryInfo->tagCond, pTableMetaInfo->pTableMeta->uid); SColumnIndex index = {.tableIndex = 0, .columnIndex = columnInfo}; - SSchema* pSchema = tsGetTagSchema(pMeterMetaInfo->pMeterMeta); + SSchema* pSchema = tscGetTableTagSchema(pTableMetaInfo->pTableMeta); int16_t type = pSchema[index.columnIndex].type; int16_t bytes = pSchema[index.columnIndex].bytes; @@ -4844,9 +4860,9 @@ static void doLimitOutputNormalColOfGroupby(SSqlExpr* pExpr) { void doAddGroupColumnForSubquery(SQueryInfo* pQueryInfo, int32_t tagIndex) { int32_t index = pQueryInfo->groupbyExpr.columnInfo[tagIndex].colIdx; - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); - SSchema* pSchema = tsGetColumnSchema(pMeterMetaInfo->pMeterMeta, index); + SSchema* pSchema = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, index); SColumnIndex colIndex = {.tableIndex = 0, .columnIndex = index}; SSqlExpr* pExpr = tscSqlExprInsert(pQueryInfo, pQueryInfo->exprsInfo.numOfExprs, TSDB_FUNC_PRJ, &colIndex, @@ -4878,8 +4894,8 @@ static void doUpdateSqlFunctionForTagPrj(SQueryInfo* pQueryInfo) { } } - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - SSchema* pSchema = tsGetSchema(pMeterMetaInfo->pMeterMeta); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + SSchema* pSchema = tscGetTableSchema(pTableMetaInfo->pTableMeta); for (int32_t i = 0; i < pQueryInfo->exprsInfo.numOfExprs; ++i) { SSqlExpr* pExpr = tscSqlExprGet(pQueryInfo, i); @@ -5054,9 +5070,9 @@ static int32_t checkUpdateTagPrjFunctions(SQueryInfo* pQueryInfo) { static int32_t doAddGroupbyColumnsOnDemand(SQueryInfo* pQueryInfo) { const char* msg2 = "interval not allowed in group by normal column"; - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); - SSchema* pSchema = tsGetSchema(pMeterMetaInfo->pMeterMeta); + SSchema* pSchema = tscGetTableSchema(pTableMetaInfo->pTableMeta); int16_t bytes = 0; int16_t type = 0; char* name = NULL; @@ -5070,7 +5086,7 @@ static int32_t doAddGroupbyColumnsOnDemand(SQueryInfo* pQueryInfo) { bytes = TSDB_TABLE_NAME_LEN; name = TSQL_TBNAME_L; } else { - colIndex = (TSDB_COL_IS_TAG(pColIndex->flag)) ? pMeterMetaInfo->pMeterMeta->numOfColumns + pColIndex->colIdx + colIndex = (TSDB_COL_IS_TAG(pColIndex->flag)) ? tscGetNumOfColumns(pTableMetaInfo->pTableMeta) + pColIndex->colIdx : pColIndex->colIdx; type = pSchema[colIndex].type; @@ -5251,7 +5267,7 @@ int32_t doLocalQueryProcess(SQueryInfo* pQueryInfo, SQuerySQL* pQuerySql) { } // can only perform the parameters based on the macro definitation -int32_t tscCheckCreateDbParams(SSqlCmd* pCmd, SCreateDbMsg* pCreate) { +int32_t tscCheckCreateDbParams(SSqlCmd* pCmd, SCMCreateDbMsg* pCreate) { char msg[512] = {0}; if (pCreate->commitLog != -1 && (pCreate->commitLog < 0 || pCreate->commitLog > 1)) { @@ -5364,7 +5380,7 @@ int32_t doCheckForCreateTable(SSqlObj* pSql, int32_t subClauseIndex, SSqlInfo* p SSqlCmd* pCmd = &pSql->cmd; SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, subClauseIndex); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); SCreateTableSQL* pCreateTable = pInfo->pCreateTableInfo; @@ -5380,7 +5396,7 @@ int32_t doCheckForCreateTable(SSqlObj* pSql, int32_t subClauseIndex, SSqlInfo* p return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg1); } - if (setMeterID(pMeterMetaInfo, pzTableName, pSql) != TSDB_CODE_SUCCESS) { + if (setMeterID(pTableMetaInfo, pzTableName, pSql) != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg2); } @@ -5419,13 +5435,13 @@ int32_t doCheckForCreateFromStable(SSqlObj* pSql, SSqlInfo* pInfo) { SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); // two table: the first one is for current table, and the secondary is for the super table. - tscAddEmptyMeterMetaInfo(pQueryInfo); + tscAddEmptyMetaInfo(pQueryInfo); assert(pQueryInfo->numOfTables == 2); const int32_t TABLE_INDEX = 0; const int32_t STABLE_INDEX = 1; - SMeterMetaInfo* pStableMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, STABLE_INDEX); + STableMetaInfo* pStableMeterMetaInfo = tscGetMetaInfo(pQueryInfo, STABLE_INDEX); // super table name, create table by using dst SSQLToken* pToken = &(pCreateTable->usingInfo.stableName); @@ -5442,17 +5458,17 @@ int32_t doCheckForCreateFromStable(SSqlObj* pSql, SSqlInfo* pInfo) { strncpy(pCreateTable->usingInfo.tagdata.name, pStableMeterMetaInfo->name, TSDB_TABLE_ID_LEN); tVariantList* pList = pInfo->pCreateTableInfo->usingInfo.pTagVals; - int32_t code = tscGetMeterMeta(pSql, pStableMeterMetaInfo); + int32_t code = tscGetTableMeta(pSql, pStableMeterMetaInfo); if (code != TSDB_CODE_SUCCESS) { return code; } - if (pStableMeterMetaInfo->pMeterMeta->numOfTags != pList->nExpr) { + if (tscGetNumOfTags(pStableMeterMetaInfo->pTableMeta) != pList->nExpr) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg5); } // too long tag values will return invalid sql, not be truncated automatically - SSchema* pTagSchema = tsGetTagSchema(pStableMeterMetaInfo->pMeterMeta); + SSchema* pTagSchema = tscGetTableTagSchema(pStableMeterMetaInfo->pTableMeta); char* tagVal = pCreateTable->usingInfo.tagdata.data; for (int32_t i = 0; i < pList->nExpr; ++i) { @@ -5475,7 +5491,7 @@ int32_t doCheckForCreateFromStable(SSqlObj* pSql, SSqlInfo* pInfo) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg1); } - SMeterMetaInfo* pTableMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, TABLE_INDEX); + STableMetaInfo* pTableMeterMetaInfo = tscGetMetaInfo(pQueryInfo, TABLE_INDEX); int32_t ret = setMeterID(pTableMeterMetaInfo, &pInfo->pCreateTableInfo->name, pSql); if (ret != TSDB_CODE_SUCCESS) { return ret; @@ -5496,7 +5512,7 @@ int32_t doCheckForStream(SSqlObj* pSql, SSqlInfo* pInfo) { assert(pQueryInfo->numOfTables == 1); SCreateTableSQL* pCreateTable = pInfo->pCreateTableInfo; - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); // if sql specifies db, use it, otherwise use default db SSQLToken* pzTableName = &(pCreateTable->name); @@ -5514,16 +5530,16 @@ int32_t doCheckForStream(SSqlObj* pSql, SSqlInfo* pInfo) { return invalidSqlErrMsg(pQueryInfo->msg, msg1); } - if (setMeterID(pMeterMetaInfo, &srcToken, pSql) != TSDB_CODE_SUCCESS) { + if (setMeterID(pTableMetaInfo, &srcToken, pSql) != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(pQueryInfo->msg, msg2); } - int32_t code = tscGetMeterMeta(pSql, pMeterMetaInfo); + int32_t code = tscGetTableMeta(pSql, pTableMetaInfo); if (code != TSDB_CODE_SUCCESS) { return code; } - bool isSTable = UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo); + bool isSTable = UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo); if (parseSelectClause(&pSql->cmd, 0, pQuerySql->pSelection, isSTable) != TSDB_CODE_SUCCESS) { return TSDB_CODE_INVALID_SQL; } @@ -5545,7 +5561,7 @@ int32_t doCheckForStream(SSqlObj* pSql, SSqlInfo* pInfo) { } // set the created table[stream] name - if (setMeterID(pMeterMetaInfo, pzTableName, pSql) != TSDB_CODE_SUCCESS) { + if (setMeterID(pTableMetaInfo, pzTableName, pSql) != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(pQueryInfo->msg, msg1); } @@ -5603,9 +5619,9 @@ int32_t doCheckForQuery(SSqlObj* pSql, SQuerySQL* pQuerySql, int32_t index) { SSqlCmd* pCmd = &pSql->cmd; SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, index); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - if (pMeterMetaInfo == NULL) { - pMeterMetaInfo = tscAddEmptyMeterMetaInfo(pQueryInfo); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + if (pTableMetaInfo == NULL) { + pTableMetaInfo = tscAddEmptyMetaInfo(pQueryInfo); } // too many result columns not support order by in query @@ -5648,17 +5664,17 @@ int32_t doCheckForQuery(SSqlObj* pSql, SQuerySQL* pQuerySql, int32_t index) { } if (pQueryInfo->numOfTables <= i) { // more than one table - tscAddEmptyMeterMetaInfo(pQueryInfo); + tscAddEmptyMetaInfo(pQueryInfo); } - SMeterMetaInfo* pMeterInfo1 = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, i); + STableMetaInfo* pMeterInfo1 = tscGetMetaInfo(pQueryInfo, i); SSQLToken t = {.type = TSDB_DATA_TYPE_BINARY, .n = pTableItem->nLen, .z = pTableItem->pz}; if (setMeterID(pMeterInfo1, &t, pSql) != TSDB_CODE_SUCCESS) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg1); } - code = tscGetMeterMeta(pSql, pMeterInfo1); + code = tscGetTableMeta(pSql, pMeterInfo1); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -5671,7 +5687,7 @@ int32_t doCheckForQuery(SSqlObj* pSql, SQuerySQL* pQuerySql, int32_t index) { return TSDB_CODE_INVALID_SQL; } - bool isSTable = UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo); + bool isSTable = UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo); if (parseSelectClause(pCmd, index, pQuerySql->pSelection, isSTable) != TSDB_CODE_SUCCESS) { return TSDB_CODE_INVALID_SQL; } @@ -5687,19 +5703,20 @@ int32_t doCheckForQuery(SSqlObj* pSql, SQuerySQL* pQuerySql, int32_t index) { } // set order by info - if (parseOrderbyClause(pQueryInfo, pQuerySql, tsGetSchema(pMeterMetaInfo->pMeterMeta)) != TSDB_CODE_SUCCESS) { + if (parseOrderbyClause(pQueryInfo, pQuerySql, tscGetTableSchema(pTableMetaInfo->pTableMeta)) != TSDB_CODE_SUCCESS) { return TSDB_CODE_INVALID_SQL; } // set where info + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + if (pQuerySql->pWhere != NULL) { if (parseWhereClause(pQueryInfo, &pQuerySql->pWhere, pSql) != TSDB_CODE_SUCCESS) { return TSDB_CODE_INVALID_SQL; } pQuerySql->pWhere = NULL; - - if (pMeterMetaInfo->pMeterMeta->precision == TSDB_TIME_PRECISION_MILLI) { + if (tinfo.precision == TSDB_TIME_PRECISION_MILLI) { pQueryInfo->stime = pQueryInfo->stime / 1000; pQueryInfo->etime = pQueryInfo->etime / 1000; } @@ -5710,8 +5727,7 @@ int32_t doCheckForQuery(SSqlObj* pSql, SQuerySQL* pQuerySql, int32_t index) { // user does not specified the query time window, twa is not allowed in such case. if ((pQueryInfo->stime == 0 || pQueryInfo->etime == INT64_MAX || - (pQueryInfo->etime == INT64_MAX / 1000 && pMeterMetaInfo->pMeterMeta->precision == TSDB_TIME_PRECISION_MILLI)) && - tscIsTWAQuery(pQueryInfo)) { + (pQueryInfo->etime == INT64_MAX / 1000 && tinfo.precision == TSDB_TIME_PRECISION_MILLI)) && tscIsTWAQuery(pQueryInfo)) { return invalidSqlErrMsg(tscGetErrorMsgPayload(pCmd), msg9); } diff --git a/src/client/src/tscSchemaUtil.c b/src/client/src/tscSchemaUtil.c index d412ac18c86988665a2f5a1e1dad688d4bc45700..be0065ff4a0773c009513beb5bddefe80b67c3e1 100644 --- a/src/client/src/tscSchemaUtil.c +++ b/src/client/src/tscSchemaUtil.c @@ -19,6 +19,68 @@ #include "ttokendef.h" #include "taosdef.h" #include "tutil.h" +#include "tsclient.h" + +int32_t tscGetNumOfTags(const STableMeta* pTableMeta) { + assert(pTableMeta != NULL); + + STableComInfo tinfo = tscGetTableInfo(pTableMeta); + + if (pTableMeta->tableType == TSDB_NORMAL_TABLE) { + assert(tinfo.numOfTags == 0); + return 0; + } + + if (pTableMeta->tableType == TSDB_SUPER_TABLE || pTableMeta->tableType == TSDB_CHILD_TABLE) { + assert(tinfo.numOfTags > 0); + return tinfo.numOfTags; + } + + assert(tinfo.numOfTags == 0); + return 0; +} + +int32_t tscGetNumOfColumns(const STableMeta* pTableMeta) { + assert(pTableMeta != NULL); + + // table created according to super table, use data from super table + STableComInfo tinfo = tscGetTableInfo(pTableMeta); + return tinfo.numOfColumns; +} + +SSchema *tscGetTableSchema(const STableMeta *pTableMeta) { + assert(pTableMeta != NULL); + + if (pTableMeta->tableType == TSDB_CHILD_TABLE) { + STableMeta* pSTableMeta = pTableMeta->pSTable; + assert (pSTableMeta != NULL); + + return pSTableMeta->schema; + } + + return pTableMeta->schema; +} + +SSchema* tscGetTableTagSchema(const STableMeta* pTableMeta) { + assert(pTableMeta != NULL && (pTableMeta->tableType == TSDB_SUPER_TABLE || pTableMeta->tableType == TSDB_CHILD_TABLE)); + + STableComInfo tinfo = tscGetTableInfo(pTableMeta); + assert(tinfo.numOfTags > 0); + + return tscGetTableColumnSchema(pTableMeta, tinfo.numOfColumns); +} + +STableComInfo tscGetTableInfo(const STableMeta* pTableMeta) { + assert(pTableMeta != NULL); + + if (pTableMeta->tableType == TSDB_CHILD_TABLE) { + assert (pTableMeta->pSTable != NULL); + return pTableMeta->pSTable->tableInfo; + } + + return pTableMeta->tableInfo; +} + bool isValidSchema(struct SSchema* pSchema, int32_t numOfCols) { if (!VALIDNUMOFCOLS(numOfCols)) { @@ -64,34 +126,66 @@ bool isValidSchema(struct SSchema* pSchema, int32_t numOfCols) { return (rowLen <= TSDB_MAX_BYTES_PER_ROW); } -struct SSchema* tsGetSchema(STableMeta* pMeta) { - if (pMeta == NULL) { - return NULL; - } - return tsGetColumnSchema(pMeta, 0); -} - -struct SSchema* tsGetTagSchema(STableMeta* pMeta) { - if (pMeta == NULL || pMeta->numOfTags == 0) { - return NULL; +SSchema* tscGetTableColumnSchema(const STableMeta* pTableMeta, int32_t startCol) { + assert(pTableMeta != NULL); + + SSchema* pSchema = pTableMeta->schema; + + if (pTableMeta->tableType == TSDB_CHILD_TABLE) { + assert (pTableMeta->pSTable != NULL); + pSchema = pTableMeta->pSTable->schema; } - - return tsGetColumnSchema(pMeta, pMeta->numOfColumns); + + return &pSchema[startCol]; } -struct SSchema* tsGetColumnSchema(STableMeta* pMeta, int32_t startCol) { - return (SSchema*)(((char*)pMeta + sizeof(STableMeta)) + startCol * sizeof(SSchema)); +struct SSchema tscGetTbnameColumnSchema() { + struct SSchema s = { + .colId = TSDB_TBNAME_COLUMN_INDEX, + .type = TSDB_DATA_TYPE_BINARY, + .bytes = TSDB_TABLE_NAME_LEN + }; + + strcpy(s.name, TSQL_TBNAME_L); + return s; } -struct SSchema tsGetTbnameColumnSchema() { - struct SSchema s = {.colId = TSDB_TBNAME_COLUMN_INDEX, .type = TSDB_DATA_TYPE_BINARY, .bytes = TSDB_TABLE_NAME_LEN}; - strcpy(s.name, TSQL_TBNAME_L); +STableMeta* tscCreateTableMetaFromMsg(STableMetaMsg* pTableMetaMsg, size_t* size) { + assert(pTableMetaMsg != NULL); - return s; + int32_t schemaSize = (pTableMetaMsg->numOfColumns + pTableMetaMsg->numOfTags) * sizeof(SSchema); + STableMeta* pTableMeta = calloc(1, sizeof(STableMeta) + schemaSize); + pTableMeta->tableType = pTableMetaMsg->tableType; + + pTableMeta->tableInfo = (STableComInfo) { + .numOfTags = pTableMetaMsg->numOfTags, + .numOfColumns = pTableMetaMsg->numOfColumns, + .precision = pTableMetaMsg->precision + }; + + pTableMeta->sid = pTableMetaMsg->sid; + pTableMeta->uid = pTableMetaMsg->uid; + pTableMeta->vgId = pTableMetaMsg->vgId; + + pTableMeta->numOfVpeers = pTableMetaMsg->numOfVpeers; + memcpy(pTableMeta->vpeerDesc, pTableMetaMsg->vpeerDesc, sizeof(SVnodeDesc) * pTableMeta->numOfVpeers); + memcpy(pTableMeta->schema, pTableMetaMsg->schema, schemaSize); + + int32_t numOfTotalCols = pTableMeta->tableInfo.numOfColumns + pTableMeta->tableInfo.numOfTags; + for(int32_t i = 0; i < numOfTotalCols; ++i) { + pTableMeta->tableInfo.rowSize += pTableMeta->schema[i].bytes; + } + + if (size != NULL) { + *size = sizeof(STableMeta) + schemaSize; + } + + return pTableMeta; } + /** - * the MeterMeta data format in memory is as follows: + * the TableMeta data format in memory is as follows: * * +--------------------+ * |STableMeta Body data| sizeof(STableMeta) @@ -101,33 +195,15 @@ struct SSchema tsGetTbnameColumnSchema() { * |Tags data | tag_col_1.bytes + tag_col_2.bytes + .... * +--------------------+ * - * @param pMeta + * @param pTableMeta * @return */ -char* tsGetTagsValue(STableMeta* pMeta) { - int32_t numOfTotalCols = pMeta->numOfColumns + pMeta->numOfTags; - uint32_t offset = sizeof(STableMeta) + numOfTotalCols * sizeof(SSchema); - - return ((char*)pMeta + offset); -} +char* tsGetTagsValue(STableMeta* pTableMeta) { + int32_t offset = 0; +// int32_t numOfTotalCols = pTableMeta->numOfColumns + pTableMeta->numOfTags; +// uint32_t offset = sizeof(STableMeta) + numOfTotalCols * sizeof(SSchema); -bool tsMeterMetaIdentical(STableMeta* p1, STableMeta* p2) { - if (p1 == NULL || p2 == NULL || p1->uid != p2->uid || p1->sversion != p2->sversion) { - return false; - } - - if (p1 == p2) { - return true; - } - - size_t size = sizeof(STableMeta) + p1->numOfColumns * sizeof(SSchema); - - for (int32_t i = 0; i < p1->numOfTags; ++i) { - SSchema* pColSchema = tsGetColumnSchema(p1, i + p1->numOfColumns); - size += pColSchema->bytes; - } - - return memcmp(p1, p2, size) == 0; + return ((char*)pTableMeta + offset); } // todo refactor @@ -149,24 +225,6 @@ static FORCE_INLINE size_t copy(char* dst, const char* src, char delimiter) { return len; } -/** - * extract table name from meterid, which the format of userid.dbname.metername - * @param tableId - * @return - */ -void extractTableName(char* tableId, char* name) { - char* r = skipSegments(tableId, TS_PATH_DELIMITER[0], 2); - copy(name, r, TS_PATH_DELIMITER[0]); -} - -SSQLToken extractDBName(char* tableId, char* name) { - char* r = skipSegments(tableId, TS_PATH_DELIMITER[0], 1); - size_t len = copy(name, r, TS_PATH_DELIMITER[0]); - - SSQLToken token = {.z = name, .n = len, .type = TK_STRING}; - return token; -} - /* * tablePrefix.columnName * extract table name and save it in pTable, with only column name in pToken diff --git a/src/client/src/tscSecondaryMerge.c b/src/client/src/tscSecondaryMerge.c index 51a59005f001f7555a3ffc2e5c7e15ed111af988..0ee8afd53c59969f11bfed85c1c32804e48050f9 100644 --- a/src/client/src/tscSecondaryMerge.c +++ b/src/client/src/tscSecondaryMerge.c @@ -319,12 +319,13 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd pRes->pLocalReducer = pReducer; pRes->numOfGroups = 0; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - int16_t prec = pMeterMetaInfo->pMeterMeta->precision; - + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + + int16_t prec = tinfo.precision; int64_t stime = (pQueryInfo->stime < pQueryInfo->etime) ? pQueryInfo->stime : pQueryInfo->etime; int64_t revisedSTime = - taosGetIntervalStartTimestamp(stime, pQueryInfo->intervalTime, pQueryInfo->intervalTimeUnit, prec); + taosGetIntervalStartTimestamp(stime, pQueryInfo->intervalTime, pQueryInfo->slidingTimeUnit, prec); SInterpolationInfo *pInterpoInfo = &pReducer->interpolationInfo; taosInitInterpoInfo(pInterpoInfo, pQueryInfo->order.order, revisedSTime, pQueryInfo->groupbyExpr.numOfGroupCols, @@ -602,9 +603,9 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr *pFinalModel = NULL; SQueryInfo * pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); - (*pMemBuffer) = (tExtMemBuffer **)malloc(POINTER_BYTES * pMeterMetaInfo->pMetricMeta->numOfVnodes); + (*pMemBuffer) = (tExtMemBuffer **)malloc(POINTER_BYTES * pTableMetaInfo->pMetricMeta->numOfVnodes); if (*pMemBuffer == NULL) { tscError("%p failed to allocate memory", pSql); pRes->code = TSDB_CODE_CLI_OUT_OF_MEMORY; @@ -635,7 +636,7 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr pModel = createColumnModel(pSchema, pQueryInfo->exprsInfo.numOfExprs, capacity); - for (int32_t i = 0; i < pMeterMetaInfo->pMetricMeta->numOfVnodes; ++i) { + for (int32_t i = 0; i < pTableMetaInfo->pMetricMeta->numOfVnodes; ++i) { (*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pModel); (*pMemBuffer)[i]->flushModel = MULTIPLE_APPEND_MODEL; } @@ -650,7 +651,7 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr for (int32_t i = 0; i < pQueryInfo->exprsInfo.numOfExprs; ++i) { SSqlExpr *pExpr = tscSqlExprGet(pQueryInfo, i); - SSchema *p1 = tsGetColumnSchema(pMeterMetaInfo->pMeterMeta, pExpr->colInfo.colIdx); + SSchema *p1 = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, pExpr->colInfo.colIdx); int16_t inter = 0; int16_t type = -1; @@ -774,12 +775,14 @@ void adjustLoserTreeFromNewData(SLocalReducer *pLocalReducer, SLocalDataSource * void savePrevRecordAndSetupInterpoInfo(SLocalReducer *pLocalReducer, SQueryInfo *pQueryInfo, SInterpolationInfo *pInterpoInfo) { // discard following dataset in the same group and reset the interpolation information - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - int16_t prec = pMeterMetaInfo->pMeterMeta->precision; - + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + + int16_t prec = tinfo.precision; int64_t stime = (pQueryInfo->stime < pQueryInfo->etime) ? pQueryInfo->stime : pQueryInfo->etime; int64_t revisedSTime = - taosGetIntervalStartTimestamp(stime, pQueryInfo->intervalTime, pQueryInfo->intervalTimeUnit, prec); + taosGetIntervalStartTimestamp(stime, pQueryInfo->intervalTime, pQueryInfo->slidingTimeUnit, prec); taosInitInterpoInfo(pInterpoInfo, pQueryInfo->order.order, revisedSTime, pQueryInfo->groupbyExpr.numOfGroupCols, pLocalReducer->rowSize); @@ -917,13 +920,15 @@ static void doInterpolateResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, boo functions[i] = tscSqlExprGet(pQueryInfo, i)->functionId; } - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - int8_t precision = pMeterMetaInfo->pMeterMeta->precision; + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + + int8_t precision = tinfo.precision; while (1) { int32_t remains = taosNumOfRemainPoints(pInterpoInfo); TSKEY etime = taosGetRevisedEndKey(actualETime, pQueryInfo->order.order, pQueryInfo->intervalTime, - pQueryInfo->intervalTimeUnit, precision); + pQueryInfo->slidingTimeUnit, precision); int32_t nrows = taosGetNumOfResultWithInterpo(pInterpoInfo, pPrimaryKeys, remains, pQueryInfo->intervalTime, etime, pLocalReducer->resColModel->capacity); @@ -1268,14 +1273,16 @@ static void resetEnvForNewResultset(SSqlRes *pRes, SSqlCmd *pCmd, SLocalReducer pQueryInfo->limit.offset = pLocalReducer->offset; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - int16_t precision = pMeterMetaInfo->pMeterMeta->precision; + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + + int8_t precision = tinfo.precision; // for group result interpolation, do not return if not data is generated if (pQueryInfo->interpoType != TSDB_INTERPO_NONE) { int64_t stime = (pQueryInfo->stime < pQueryInfo->etime) ? pQueryInfo->stime : pQueryInfo->etime; int64_t newTime = - taosGetIntervalStartTimestamp(stime, pQueryInfo->intervalTime, pQueryInfo->intervalTimeUnit, precision); + taosGetIntervalStartTimestamp(stime, pQueryInfo->intervalTime, pQueryInfo->slidingTimeUnit, precision); taosInitInterpoInfo(&pLocalReducer->interpolationInfo, pQueryInfo->order.order, newTime, pQueryInfo->groupbyExpr.numOfGroupCols, pLocalReducer->rowSize); @@ -1294,8 +1301,10 @@ static bool doInterpolationForCurrentGroup(SSqlObj *pSql) { SLocalReducer * pLocalReducer = pRes->pLocalReducer; SInterpolationInfo *pInterpoInfo = &pLocalReducer->interpolationInfo; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - int8_t p = pMeterMetaInfo->pMeterMeta->precision; + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + + int8_t p = tinfo.precision; if (taosHasRemainsDataForInterpolation(pInterpoInfo)) { assert(pQueryInfo->interpoType != TSDB_INTERPO_NONE); @@ -1305,7 +1314,7 @@ static bool doInterpolationForCurrentGroup(SSqlObj *pSql) { int32_t remain = taosNumOfRemainPoints(pInterpoInfo); TSKEY ekey = - taosGetRevisedEndKey(etime, pQueryInfo->order.order, pQueryInfo->intervalTime, pQueryInfo->intervalTimeUnit, p); + taosGetRevisedEndKey(etime, pQueryInfo->order.order, pQueryInfo->intervalTime, pQueryInfo->slidingTimeUnit, p); int32_t rows = taosGetNumOfResultWithInterpo(pInterpoInfo, (TSKEY *)pLocalReducer->pBufForInterpo, remain, pQueryInfo->intervalTime, ekey, pLocalReducer->resColModel->capacity); if (rows > 0) { // do interpo @@ -1328,8 +1337,10 @@ static bool doHandleLastRemainData(SSqlObj *pSql) { bool prevGroupCompleted = (!pLocalReducer->discard) && pLocalReducer->hasUnprocessedRow; SQueryInfo * pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - int8_t precision = pMeterMetaInfo->pMeterMeta->precision; + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + int8_t precision = tinfo.precision; if ((isAllSourcesCompleted(pLocalReducer) && !pLocalReducer->hasPrevRow) || pLocalReducer->pLocalDataSrc[0] == NULL || prevGroupCompleted) { @@ -1338,7 +1349,7 @@ static bool doHandleLastRemainData(SSqlObj *pSql) { int64_t etime = (pQueryInfo->stime < pQueryInfo->etime) ? pQueryInfo->etime : pQueryInfo->stime; etime = taosGetRevisedEndKey(etime, pQueryInfo->order.order, pQueryInfo->intervalTime, - pQueryInfo->intervalTimeUnit, precision); + pQueryInfo->slidingTimeUnit, precision); int32_t rows = taosGetNumOfResultWithInterpo(pInterpoInfo, NULL, 0, pQueryInfo->intervalTime, etime, pLocalReducer->resColModel->capacity); if (rows > 0) { // do interpo diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index 68faf08dd6a20c7091d53e429c00389e95afffb3..8fb3df94d5844734ec07ee83e50d193fc0e7c7fa 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -16,14 +16,12 @@ #include "os.h" #include "tcache.h" #include "trpc.h" -#include "tscJoinProcess.h" #include "tscProfile.h" -#include "tscSQLParser.h" #include "tscSecondaryMerge.h" +#include "tscSubquery.h" #include "tscUtil.h" #include "tschemautil.h" #include "tsclient.h" -#include "tscompression.h" #include "tsocket.h" #include "ttime.h" #include "ttimer.h" @@ -31,9 +29,6 @@ #define TSC_MGMT_VNODE 999 -int tsMasterIndex = 0; -int tsSlaveIndex = 1; - SRpcIpSet tscMgmtIpList; SRpcIpSet tscDnodeIpSet; @@ -72,7 +67,7 @@ void tscSetMgmtIpListFromEdge() { if (tscMgmtIpList.numOfIps != 1) { tscMgmtIpList.numOfIps = 1; tscMgmtIpList.inUse = 0; - tscMgmtIpList.port = tsMgmtShellPort; + tscMgmtIpList.port = tsMnodeShellPort; tscMgmtIpList.ip[0] = inet_addr(tsMasterIp); tscTrace("edge mgmt IP list:"); tscPrintMgmtIp(); @@ -116,7 +111,7 @@ void tscProcessHeartBeatRsp(void *param, TAOS_RES *tres, int code) { SSqlRes *pRes = &pSql->res; if (code == 0) { - SHeartBeatRsp *pRsp = (SHeartBeatRsp *)pRes->pRsp; + SCMHeartBeatRsp *pRsp = (SCMHeartBeatRsp *)pRes->pRsp; SRpcIpSet * pIpList = &pRsp->ipList; tscSetMgmtIpList(pIpList); @@ -181,11 +176,10 @@ int tscSendMsgToServer(SSqlObj *pSql) { return TSDB_CODE_CLI_OUT_OF_MEMORY; } - pSql->ipList->ip[0] = inet_addr("192.168.0.1"); - SSqlCmd* pCmd = &pSql->cmd; + pSql->ipList->ip[0] = inet_addr(tsPrivateIp); if (pSql->cmd.command < TSDB_SQL_MGMT) { - pSql->ipList->port = tsVnodeShellPort; + pSql->ipList->port = tsDnodeShellPort; tscPrint("%p msg:%s is sent to server %d", pSql, taosMsg[pSql->cmd.msgType], pSql->ipList->port); memcpy(pMsg, pSql->cmd.payload + tsRpcHeadSize, pSql->cmd.payloadLen); @@ -198,8 +192,8 @@ int tscSendMsgToServer(SSqlObj *pSql) { }; rpcSendRequest(pVnodeConn, pSql->ipList, &rpcMsg); } else { - pSql->ipList->port = tsMgmtShellPort; - tscPrint("%p msg:%s is sent to server %d", pSql, taosMsg[pSql->cmd.msgType], pSql->ipList->port); + pSql->ipList->port = tsMnodeShellPort; + tscTrace("%p msg:%s is sent to server %d", pSql, taosMsg[pSql->cmd.msgType], pSql->ipList->port); memcpy(pMsg, pSql->cmd.payload, pSql->cmd.payloadLen); SRpcMsg rpcMsg = { .msgType = pSql->cmd.msgType, @@ -215,7 +209,7 @@ int tscSendMsgToServer(SSqlObj *pSql) { } void tscProcessMsgFromServer(SRpcMsg *rpcMsg) { - tscPrint("response:%s is received, len:%d error:%s", taosMsg[rpcMsg->msgType], rpcMsg->contLen, tstrerror(rpcMsg->code)); + tscTrace("response:%s is received, len:%d error:%s", taosMsg[rpcMsg->msgType], rpcMsg->contLen, tstrerror(rpcMsg->code)); SSqlObj *pSql = (SSqlObj *)rpcMsg->handle; if (pSql == NULL || pSql->signature != pSql) { tscError("%p sql is already released, signature:%p", pSql, pSql->signature); @@ -238,7 +232,7 @@ void tscProcessMsgFromServer(SRpcMsg *rpcMsg) { if (rpcMsg->pCont == NULL) { rpcMsg->code = TSDB_CODE_NETWORK_UNAVAIL; } else { - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); if (rpcMsg->code == TSDB_CODE_NOT_ACTIVE_TABLE || rpcMsg->code == TSDB_CODE_INVALID_TABLE_ID || rpcMsg->code == TSDB_CODE_INVALID_VNODE_ID || rpcMsg->code == TSDB_CODE_NOT_ACTIVE_VNODE || rpcMsg->code == TSDB_CODE_NETWORK_UNAVAIL || rpcMsg->code == TSDB_CODE_NOT_ACTIVE_SESSION || @@ -267,9 +261,9 @@ void tscProcessMsgFromServer(SRpcMsg *rpcMsg) { pSql->maxRetry = TSDB_VNODES_SUPPORT * 2; pSql->res.code = rpcMsg->code; // keep the previous error code - rpcMsg->code = tscRenewMeterMeta(pSql, pMeterMetaInfo->name); + rpcMsg->code = tscRenewMeterMeta(pSql, pTableMetaInfo->name); - if (pMeterMetaInfo->pMeterMeta) { + if (pTableMetaInfo->pTableMeta) { tscSendMsgToServer(pSql); rpcFreeCont(rpcMsg->pCont); return; @@ -280,8 +274,6 @@ void tscProcessMsgFromServer(SRpcMsg *rpcMsg) { pSql->retry = 0; - if (pSql->fp == NULL) tsem_wait(&pSql->emptyRspSem); - pRes->rspLen = 0; if (pRes->code != TSDB_CODE_QUERY_CANCELLED) { pRes->code = (rpcMsg->code != TSDB_CODE_SUCCESS) ? rpcMsg->code : TSDB_CODE_NETWORK_UNAVAIL; @@ -306,7 +298,7 @@ void tscProcessMsgFromServer(SRpcMsg *rpcMsg) { } // ignore the error information returned from mnode when set ignore flag in sql - if (pRes->code == TSDB_CODE_DB_ALREADY_EXIST && pCmd->existsCheck && pRes->rspType == TSDB_MSG_TYPE_CREATE_DB_RSP) { + if (pRes->code == TSDB_CODE_DB_ALREADY_EXIST && pCmd->existsCheck && pRes->rspType == TSDB_MSG_TYPE_CM_CREATE_DB_RSP) { pRes->code = TSDB_CODE_SUCCESS; } @@ -330,43 +322,35 @@ void tscProcessMsgFromServer(SRpcMsg *rpcMsg) { } } - if (pSql->fp == NULL) { - tsem_post(&pSql->rspSem); - } else { - if (pRes->code == TSDB_CODE_SUCCESS && tscProcessMsgRsp[pCmd->command]) - rpcMsg->code = (*tscProcessMsgRsp[pCmd->command])(pSql); + if (pRes->code == TSDB_CODE_SUCCESS && tscProcessMsgRsp[pCmd->command]) + rpcMsg->code = (*tscProcessMsgRsp[pCmd->command])(pSql); - if (rpcMsg->code != TSDB_CODE_ACTION_IN_PROGRESS) { - int command = pCmd->command; - void *taosres = tscKeepConn[command] ? pSql : NULL; - rpcMsg->code = pRes->code ? -pRes->code : pRes->numOfRows; + if (rpcMsg->code != TSDB_CODE_ACTION_IN_PROGRESS) { + int command = pCmd->command; + void *taosres = tscKeepConn[command] ? pSql : NULL; + rpcMsg->code = pRes->code ? -pRes->code : pRes->numOfRows; - tscTrace("%p Async SQL result:%d res:%p", pSql, rpcMsg->code, taosres); + tscTrace("%p Async SQL result:%d res:%p", pSql, rpcMsg->code, taosres); - /* - * Whether to free sqlObj or not should be decided before call the user defined function, since this SqlObj - * may be freed in UDF, and reused by other threads before tscShouldFreeAsyncSqlObj called, in which case - * tscShouldFreeAsyncSqlObj checks an object which is actually allocated by other threads. - * - * If this block of memory is re-allocated for an insert thread, in which tscKeepConn[command] equals to 0, - * the tscShouldFreeAsyncSqlObj will success and tscFreeSqlObj free it immediately. - */ - bool shouldFree = tscShouldFreeAsyncSqlObj(pSql); - if (command == TSDB_SQL_INSERT) { // handle multi-vnode insertion situation - (*pSql->fp)(pSql, taosres, rpcMsg->code); + /* + * Whether to free sqlObj or not should be decided before call the user defined function, since this SqlObj + * may be freed in UDF, and reused by other threads before tscShouldFreeAsyncSqlObj called, in which case + * tscShouldFreeAsyncSqlObj checks an object which is actually allocated by other threads. + * + * If this block of memory is re-allocated for an insert thread, in which tscKeepConn[command] equals to 0, + * the tscShouldFreeAsyncSqlObj will success and tscFreeSqlObj free it immediately. + */ + bool shouldFree = tscShouldFreeAsyncSqlObj(pSql); + (*pSql->fp)(pSql->param, taosres, rpcMsg->code); + + if (shouldFree) { + // If it is failed, all objects allocated during execution taos_connect_a should be released + if (command == TSDB_SQL_CONNECT) { + taos_close(pObj); + tscTrace("%p Async sql close failed connection", pSql); } else { - (*pSql->fp)(pSql->param, taosres, rpcMsg->code); - } - - if (shouldFree) { - // If it is failed, all objects allocated during execution taos_connect_a should be released - if (command == TSDB_SQL_CONNECT) { - taos_close(pObj); - tscTrace("%p Async sql close failed connection", pSql); - } else { - tscFreeSqlObj(pSql); - tscTrace("%p Async sql is automatically freed", pSql); - } + tscFreeSqlObj(pSql); + tscTrace("%p Async sql is automatically freed", pSql); } } } @@ -374,143 +358,28 @@ void tscProcessMsgFromServer(SRpcMsg *rpcMsg) { rpcFreeCont(rpcMsg->pCont); } -static SSqlObj *tscCreateSqlObjForSubquery(SSqlObj *pSql, SRetrieveSupport *trsupport, SSqlObj *prevSqlObj); -static int tscLaunchSTableSubqueries(SSqlObj *pSql); - -// todo merge with callback -int32_t tscLaunchJoinSubquery(SSqlObj *pSql, int16_t tableIndex, SJoinSubquerySupporter *pSupporter) { - SSqlCmd * pCmd = &pSql->cmd; - SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); - - pSql->res.qhandle = 0x1; - pSql->res.numOfRows = 0; - - if (pSql->pSubs == NULL) { - pSql->pSubs = calloc(pSupporter->pState->numOfTotal, POINTER_BYTES); - if (pSql->pSubs == NULL) { - return TSDB_CODE_CLI_OUT_OF_MEMORY; - } - } - - SSqlObj *pNew = createSubqueryObj(pSql, tableIndex, tscJoinQueryCallback, pSupporter, NULL); - if (pNew == NULL) { - return TSDB_CODE_CLI_OUT_OF_MEMORY; - } - - pSql->pSubs[pSql->numOfSubs++] = pNew; - assert(pSql->numOfSubs <= pSupporter->pState->numOfTotal); - - if (QUERY_IS_JOIN_QUERY(pQueryInfo->type)) { - addGroupInfoForSubquery(pSql, pNew, 0, tableIndex); - - // refactor as one method - SQueryInfo *pNewQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); - assert(pNewQueryInfo != NULL); - - tscColumnBaseInfoUpdateTableIndex(&pNewQueryInfo->colList, 0); - tscColumnBaseInfoCopy(&pSupporter->colList, &pNewQueryInfo->colList, 0); - - tscSqlExprCopy(&pSupporter->exprsInfo, &pNewQueryInfo->exprsInfo, pSupporter->uid, false); - tscFieldInfoCopyAll(&pSupporter->fieldsInfo, &pNewQueryInfo->fieldsInfo); - - tscTagCondCopy(&pSupporter->tagCond, &pNewQueryInfo->tagCond); - - pNew->cmd.numOfCols = 0; - pNewQueryInfo->intervalTime = 0; - memset(&pNewQueryInfo->limit, 0, sizeof(SLimitVal)); - - // backup the data and clear it in the sqlcmd object - pSupporter->groupbyExpr = pNewQueryInfo->groupbyExpr; - memset(&pNewQueryInfo->groupbyExpr, 0, sizeof(SSqlGroupbyExpr)); - - // this data needs to be transfer to support struct - pNewQueryInfo->fieldsInfo.numOfOutputCols = 0; - pNewQueryInfo->exprsInfo.numOfExprs = 0; - - // set the ts,tags that involved in join, as the output column of intermediate result - tscClearSubqueryInfo(&pNew->cmd); - - SSchema colSchema = {.type = TSDB_DATA_TYPE_BINARY, .bytes = 1}; - SColumnIndex index = {0, PRIMARYKEY_TIMESTAMP_COL_INDEX}; - - tscAddSpecialColumnForSelect(pNewQueryInfo, 0, TSDB_FUNC_TS_COMP, &index, &colSchema, TSDB_COL_NORMAL); - - // set the tags value for ts_comp function - SSqlExpr *pExpr = tscSqlExprGet(pNewQueryInfo, 0); - - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pNewQueryInfo, 0); - int16_t tagColIndex = tscGetJoinTagColIndexByUid(&pSupporter->tagCond, pMeterMetaInfo->pMeterMeta->uid); - - pExpr->param->i64Key = tagColIndex; - pExpr->numOfParams = 1; - - // add the filter tag column - for (int32_t i = 0; i < pSupporter->colList.numOfCols; ++i) { - SColumnBase *pColBase = &pSupporter->colList.pColList[i]; - if (pColBase->numOfFilters > 0) { // copy to the pNew->cmd.colList if it is filtered. - tscColumnBaseCopy(&pNewQueryInfo->colList.pColList[pNewQueryInfo->colList.numOfCols], pColBase); - pNewQueryInfo->colList.numOfCols++; - } - } - - tscTrace("%p subquery:%p tableIndex:%d, vnodeIdx:%d, type:%d, transfer to ts_comp query to retrieve timestamps, " - "exprInfo:%d, colList:%d, fieldsInfo:%d, name:%s", - pSql, pNew, tableIndex, pMeterMetaInfo->vnodeIndex, pNewQueryInfo->type, - pNewQueryInfo->exprsInfo.numOfExprs, pNewQueryInfo->colList.numOfCols, - pNewQueryInfo->fieldsInfo.numOfOutputCols, pNewQueryInfo->pMeterInfo[0]->name); - tscPrintSelectClause(pNew, 0); - - tscTrace("%p subquery:%p tableIndex:%d, vnodeIdx:%d, type:%d, transfer to ts_comp query to retrieve timestamps, " - "exprInfo:%d, colList:%d, fieldsInfo:%d, name:%s", - pSql, pNew, tableIndex, pMeterMetaInfo->vnodeIndex, pNewQueryInfo->type, - pNewQueryInfo->exprsInfo.numOfExprs, pNewQueryInfo->colList.numOfCols, - pNewQueryInfo->fieldsInfo.numOfOutputCols, pNewQueryInfo->pMeterInfo[0]->name); - tscPrintSelectClause(pNew, 0); - } else { - SQueryInfo *pNewQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); - pNewQueryInfo->type |= TSDB_QUERY_TYPE_SUBQUERY; - } - -#ifdef _DEBUG_VIEW - tscPrintSelectClause(pNew, 0); -#endif - - return tscProcessSql(pNew); -} - int doProcessSql(SSqlObj *pSql) { SSqlCmd *pCmd = &pSql->cmd; SSqlRes *pRes = &pSql->res; - void *asyncFp = pSql->fp; - if (pCmd->command == TSDB_SQL_SELECT || pCmd->command == TSDB_SQL_FETCH || pCmd->command == TSDB_SQL_RETRIEVE || - pCmd->command == TSDB_SQL_INSERT || pCmd->command == TSDB_SQL_CONNECT || pCmd->command == TSDB_SQL_HB || - pCmd->command == TSDB_SQL_META || pCmd->command == TSDB_SQL_METRIC) { + if (pCmd->command == TSDB_SQL_SELECT || + pCmd->command == TSDB_SQL_FETCH || + pCmd->command == TSDB_SQL_RETRIEVE || + pCmd->command == TSDB_SQL_INSERT || + pCmd->command == TSDB_SQL_CONNECT || + pCmd->command == TSDB_SQL_HB || + pCmd->command == TSDB_SQL_META || + pCmd->command == TSDB_SQL_METRIC) { tscBuildMsg[pCmd->command](pSql, NULL); } int32_t code = tscSendMsgToServer(pSql); - - if (asyncFp) { - if (code != TSDB_CODE_SUCCESS) { - pRes->code = code; - tscQueueAsyncRes(pSql); - } - return 0; - } - if (code != TSDB_CODE_SUCCESS) { pRes->code = code; - return code; + tscQueueAsyncRes(pSql); } - - tsem_wait(&pSql->rspSem); - - if (pRes->code == TSDB_CODE_SUCCESS && tscProcessMsgRsp[pCmd->command]) (*tscProcessMsgRsp[pCmd->command])(pSql); - - tsem_post(&pSql->emptyRspSem); - - return pRes->code; + + return TSDB_CODE_SUCCESS; } int tscProcessSql(SSqlObj *pSql) { @@ -519,13 +388,13 @@ int tscProcessSql(SSqlObj *pSql) { SSqlCmd *pCmd = &pSql->cmd; SQueryInfo * pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); - SMeterMetaInfo *pMeterMetaInfo = NULL; - int16_t type = 0; + STableMetaInfo *pTableMetaInfo = NULL; + uint16_t type = 0; if (pQueryInfo != NULL) { - pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - if (pMeterMetaInfo != NULL) { - name = pMeterMetaInfo->name; + pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + if (pTableMetaInfo != NULL) { + name = pTableMetaInfo->name; } type = pQueryInfo->type; @@ -536,21 +405,21 @@ int tscProcessSql(SSqlObj *pSql) { tscTrace("%p SQL cmd:%d will be processed, name:%s, type:%d", pSql, pCmd->command, name, type); if (pSql->cmd.command < TSDB_SQL_MGMT) { - // the pMeterMetaInfo cannot be NULL - if (pMeterMetaInfo == NULL) { + // the pTableMetaInfo cannot be NULL + if (pTableMetaInfo == NULL) { pSql->res.code = TSDB_CODE_OTHERS; return pSql->res.code; } // temp pSql->ipList = &tscMgmtIpList; -// if (UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { -// pSql->index = pMeterMetaInfo->pMeterMeta->index; +// if (UTIL_TABLE_IS_NOMRAL_TABLE(pTableMetaInfo)) { +// pSql->index = pTableMetaInfo->pTableMeta->index; // } else { // it must be the parent SSqlObj for super table query // if ((pQueryInfo->type & TSDB_QUERY_TYPE_SUBQUERY) != 0) { -// int32_t idx = pMeterMetaInfo->vnodeIndex; +// int32_t idx = pTableMetaInfo->vnodeIndex; // -// SVnodeSidList *pSidList = tscGetVnodeSidList(pMeterMetaInfo->pMetricMeta, idx); +// SVnodeSidList *pSidList = tscGetVnodeSidList(pTableMetaInfo->pMetricMeta, idx); // pSql->index = pSidList->index; // } // } @@ -563,42 +432,7 @@ int tscProcessSql(SSqlObj *pSql) { // todo handle async situation if (QUERY_IS_JOIN_QUERY(type)) { if ((pQueryInfo->type & TSDB_QUERY_TYPE_SUBQUERY) == 0) { - SSubqueryState *pState = calloc(1, sizeof(SSubqueryState)); - - pState->numOfTotal = pQueryInfo->numOfTables; - - for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { - SJoinSubquerySupporter *pSupporter = tscCreateJoinSupporter(pSql, pState, i); - - if (pSupporter == NULL) { // failed to create support struct, abort current query - tscError("%p tableIndex:%d, failed to allocate join support object, abort further query", pSql, i); - pState->numOfCompleted = pQueryInfo->numOfTables - i - 1; - pSql->res.code = TSDB_CODE_CLI_OUT_OF_MEMORY; - - return pSql->res.code; - } - - int32_t code = tscLaunchJoinSubquery(pSql, i, pSupporter); - if (code != TSDB_CODE_SUCCESS) { // failed to create subquery object, quit query - tscDestroyJoinSupporter(pSupporter); - pSql->res.code = TSDB_CODE_CLI_OUT_OF_MEMORY; - - break; - } - } - - tsem_post(&pSql->emptyRspSem); - tsem_wait(&pSql->rspSem); - - tsem_post(&pSql->emptyRspSem); - - if (pSql->numOfSubs <= 0) { - pSql->cmd.command = TSDB_SQL_RETRIEVE_EMPTY_RESULT; - } else { - pSql->cmd.command = TSDB_SQL_METRIC_JOIN_RETRIEVE; - } - - return TSDB_CODE_SUCCESS; + return tscHandleMasterJoinQuery(pSql); } else { // for first stage sub query, iterate all vnodes to get all timestamp if ((pQueryInfo->type & TSDB_QUERY_TYPE_JOIN_SEC_STAGE) != TSDB_QUERY_TYPE_JOIN_SEC_STAGE) { @@ -606,470 +440,23 @@ int tscProcessSql(SSqlObj *pSql) { } } } - - if (tscIsTwoStageMergeMetricQuery(pQueryInfo, 0)) { - /* - * (ref. line: 964) - * Before this function returns from tscLaunchSTableSubqueries and continues, pSql may have been released at user - * program context after retrieving all data from vnodes. User function is called at tscRetrieveFromVnodeCallBack. - * - * when pSql being released, pSql->fp == NULL, it may pass the check of pSql->fp == NULL, - * which causes deadlock. So we keep it as local variable. - */ - void *fp = pSql->fp; - - if (tscLaunchSTableSubqueries(pSql) != TSDB_CODE_SUCCESS) { - return pRes->code; - } - - if (fp == NULL) { - tsem_post(&pSql->emptyRspSem); - tsem_wait(&pSql->rspSem); - tsem_post(&pSql->emptyRspSem); - - // set the command flag must be after the semaphore been correctly set. - pSql->cmd.command = TSDB_SQL_RETRIEVE_METRIC; - } - - return pSql->res.code; - } - - return doProcessSql(pSql); -} - -static void doCleanupSubqueries(SSqlObj *pSql, int32_t numOfSubs, SSubqueryState* pState) { - assert(numOfSubs <= pSql->numOfSubs && numOfSubs >= 0 && pState != NULL); - for(int32_t i = 0; i < numOfSubs; ++i) { - SSqlObj* pSub = pSql->pSubs[i]; - assert(pSub != NULL); - - SRetrieveSupport* pSupport = pSub->param; - - tfree(pSupport->localBuffer); - - pthread_mutex_unlock(&pSupport->queryMutex); - pthread_mutex_destroy(&pSupport->queryMutex); - - tfree(pSupport); - - tscFreeSqlObj(pSub); - } - - free(pState); -} - -int tscLaunchSTableSubqueries(SSqlObj *pSql) { - SSqlRes *pRes = &pSql->res; - SSqlCmd *pCmd = &pSql->cmd; - - // pRes->code check only serves in launching metric sub-queries - if (pRes->code == TSDB_CODE_QUERY_CANCELLED) { - pCmd->command = TSDB_SQL_RETRIEVE_METRIC; // enable the abort of kill metric function. + if (tscIsTwoStageSTableQuery(pQueryInfo, 0)) { // super table query + tscHandleMasterSTableQuery(pSql); return pRes->code; + } else if (pSql->fp == (void(*)())tscHandleMultivnodeInsert) { // multi-vnodes insertion + tscHandleMultivnodeInsert(pSql); + return pSql->res.code; } - - tExtMemBuffer ** pMemoryBuf = NULL; - tOrderDescriptor *pDesc = NULL; - SColumnModel * pModel = NULL; - - pRes->qhandle = 1; // hack the qhandle check - - const uint32_t nBufferSize = (1 << 16); // 64KB - - SQueryInfo * pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - int32_t numOfSubQueries = pMeterMetaInfo->pMetricMeta->numOfVnodes; - assert(numOfSubQueries > 0); - - int32_t ret = tscLocalReducerEnvCreate(pSql, &pMemoryBuf, &pDesc, &pModel, nBufferSize); - if (ret != 0) { - pRes->code = TSDB_CODE_CLI_OUT_OF_MEMORY; - if (pSql->fp) { - tscQueueAsyncRes(pSql); - } - return pRes->code; - } - - pSql->pSubs = calloc(numOfSubQueries, POINTER_BYTES); - pSql->numOfSubs = numOfSubQueries; - - tscTrace("%p retrieved query data from %d vnode(s)", pSql, numOfSubQueries); - SSubqueryState *pState = calloc(1, sizeof(SSubqueryState)); - pState->numOfTotal = numOfSubQueries; - pRes->code = TSDB_CODE_SUCCESS; - - int32_t i = 0; - for (; i < numOfSubQueries; ++i) { - SRetrieveSupport *trs = (SRetrieveSupport *)calloc(1, sizeof(SRetrieveSupport)); - if (trs == NULL) { - tscError("%p failed to malloc buffer for SRetrieveSupport, orderOfSub:%d, reason:%s", pSql, i, strerror(errno)); - break; - } - - trs->pExtMemBuffer = pMemoryBuf; - trs->pOrderDescriptor = pDesc; - trs->pState = pState; - - trs->localBuffer = (tFilePage *)calloc(1, nBufferSize + sizeof(tFilePage)); - if (trs->localBuffer == NULL) { - tscError("%p failed to malloc buffer for local buffer, orderOfSub:%d, reason:%s", pSql, i, strerror(errno)); - tfree(trs); - break; - } - - trs->subqueryIndex = i; - trs->pParentSqlObj = pSql; - trs->pFinalColModel = pModel; - - pthread_mutexattr_t mutexattr = {0}; - pthread_mutexattr_settype(&mutexattr, PTHREAD_MUTEX_RECURSIVE_NP); - pthread_mutex_init(&trs->queryMutex, &mutexattr); - pthread_mutexattr_destroy(&mutexattr); - - SSqlObj *pNew = tscCreateSqlObjForSubquery(pSql, trs, NULL); - if (pNew == NULL) { - tscError("%p failed to malloc buffer for subObj, orderOfSub:%d, reason:%s", pSql, i, strerror(errno)); - tfree(trs->localBuffer); - tfree(trs); - break; - } - - // todo handle multi-vnode situation - if (pQueryInfo->tsBuf) { - SQueryInfo *pNewQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); - pNewQueryInfo->tsBuf = tsBufClone(pQueryInfo->tsBuf); - } - - tscTrace("%p sub:%p create subquery success. orderOfSub:%d", pSql, pNew, trs->subqueryIndex); - } - - if (i < numOfSubQueries) { - tscError("%p failed to prepare subquery structure and launch subqueries", pSql); - pRes->code = TSDB_CODE_CLI_OUT_OF_MEMORY; - - tscLocalReducerEnvDestroy(pMemoryBuf, pDesc, pModel, numOfSubQueries); - doCleanupSubqueries(pSql, i, pState); - return pRes->code; // free all allocated resource - } - - if (pRes->code == TSDB_CODE_QUERY_CANCELLED) { - tscLocalReducerEnvDestroy(pMemoryBuf, pDesc, pModel, numOfSubQueries); - doCleanupSubqueries(pSql, i, pState); - return pRes->code; - } - - for(int32_t j = 0; j < numOfSubQueries; ++j) { - SSqlObj* pSub = pSql->pSubs[j]; - SRetrieveSupport* pSupport = pSub->param; - - tscTrace("%p sub:%p launch subquery, orderOfSub:%d.", pSql, pSub, pSupport->subqueryIndex); - tscProcessSql(pSub); - } - - return TSDB_CODE_SUCCESS; -} - -static void tscFreeSubSqlObj(SRetrieveSupport *trsupport, SSqlObj *pSql) { - tscTrace("%p start to free subquery result", pSql); - - if (pSql->res.code == TSDB_CODE_SUCCESS) { - taos_free_result(pSql); - } - - tfree(trsupport->localBuffer); - - pthread_mutex_unlock(&trsupport->queryMutex); - pthread_mutex_destroy(&trsupport->queryMutex); - - tfree(trsupport); -} - -static void tscRetrieveFromVnodeCallBack(void *param, TAOS_RES *tres, int numOfRows); - -static void tscAbortFurtherRetryRetrieval(SRetrieveSupport *trsupport, TAOS_RES *tres, int32_t errCode) { -// set no disk space error info -#ifdef WINDOWS - LPVOID lpMsgBuf; - FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, - GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language - (LPTSTR)&lpMsgBuf, 0, NULL); - tscError("sub:%p failed to flush data to disk:reason:%s", tres, lpMsgBuf); - LocalFree(lpMsgBuf); -#else - char buf[256] = {0}; - strerror_r(errno, buf, 256); - tscError("sub:%p failed to flush data to disk:reason:%s", tres, buf); -#endif - - trsupport->pState->code = -errCode; - trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY; - - pthread_mutex_unlock(&trsupport->queryMutex); - - tscRetrieveFromVnodeCallBack(trsupport, tres, trsupport->pState->code); -} - -static void tscHandleSubRetrievalError(SRetrieveSupport *trsupport, SSqlObj *pSql, int numOfRows) { - SSqlObj *pPObj = trsupport->pParentSqlObj; - int32_t subqueryIndex = trsupport->subqueryIndex; - - assert(pSql != NULL); - SSubqueryState* pState = trsupport->pState; - assert(pState->numOfCompleted < pState->numOfTotal && pState->numOfCompleted >= 0 && - pPObj->numOfSubs == pState->numOfTotal); - - /* retrieved in subquery failed. OR query cancelled in retrieve phase. */ - if (pState->code == TSDB_CODE_SUCCESS && pPObj->res.code != TSDB_CODE_SUCCESS) { - pState->code = -(int)pPObj->res.code; - - /* - * kill current sub-query connection, which may retrieve data from vnodes; - * Here we get: pPObj->res.code == TSDB_CODE_QUERY_CANCELLED - */ - pSql->res.numOfRows = 0; - trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY; // disable retry efforts - tscTrace("%p query is cancelled, sub:%p, orderOfSub:%d abort retrieve, code:%d", trsupport->pParentSqlObj, pSql, - subqueryIndex, pState->code); - } - - if (numOfRows >= 0) { // current query is successful, but other sub query failed, still abort current query. - tscTrace("%p sub:%p retrieve numOfRows:%d,orderOfSub:%d", pPObj, pSql, numOfRows, subqueryIndex); - tscError("%p sub:%p abort further retrieval due to other queries failure,orderOfSub:%d,code:%d", pPObj, pSql, - subqueryIndex, pState->code); - } else { - if (trsupport->numOfRetry++ < MAX_NUM_OF_SUBQUERY_RETRY && pState->code == TSDB_CODE_SUCCESS) { - /* - * current query failed, and the retry count is less than the available - * count, retry query clear previous retrieved data, then launch a new sub query - */ - tExtMemBufferClear(trsupport->pExtMemBuffer[subqueryIndex]); - - // clear local saved number of results - trsupport->localBuffer->numOfElems = 0; - pthread_mutex_unlock(&trsupport->queryMutex); - - tscTrace("%p sub:%p retrieve failed, code:%d, orderOfSub:%d, retry:%d", trsupport->pParentSqlObj, pSql, numOfRows, - subqueryIndex, trsupport->numOfRetry); - - SSqlObj *pNew = tscCreateSqlObjForSubquery(trsupport->pParentSqlObj, trsupport, pSql); - if (pNew == NULL) { - tscError("%p sub:%p failed to create new subquery sqlobj due to out of memory, abort retry", - trsupport->pParentSqlObj, pSql); - - pState->code = TSDB_CODE_CLI_OUT_OF_MEMORY; - trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY; - return; - } - - tscProcessSql(pNew); - return; - } else { // reach the maximum retry count, abort - atomic_val_compare_exchange_32(&pState->code, TSDB_CODE_SUCCESS, numOfRows); - tscError("%p sub:%p retrieve failed,code:%d,orderOfSub:%d failed.no more retry,set global code:%d", pPObj, pSql, - numOfRows, subqueryIndex, pState->code); - } - } - - int32_t numOfTotal = pState->numOfTotal; - - int32_t finished = atomic_add_fetch_32(&pState->numOfCompleted, 1); - if (finished < numOfTotal) { - tscTrace("%p sub:%p orderOfSub:%d freed, finished subqueries:%d", pPObj, pSql, trsupport->subqueryIndex, finished); - return tscFreeSubSqlObj(trsupport, pSql); - } - - // all subqueries are failed - tscError("%p retrieve from %d vnode(s) completed,code:%d.FAILED.", pPObj, pState->numOfTotal, pState->code); - pPObj->res.code = -(pState->code); - - // release allocated resource - tscLocalReducerEnvDestroy(trsupport->pExtMemBuffer, trsupport->pOrderDescriptor, trsupport->pFinalColModel, - pState->numOfTotal); - - tfree(trsupport->pState); - tscFreeSubSqlObj(trsupport, pSql); - - // sync query, wait for the master SSqlObj to proceed - if (pPObj->fp == NULL) { - // sync query, wait for the master SSqlObj to proceed - tsem_wait(&pPObj->emptyRspSem); - tsem_wait(&pPObj->emptyRspSem); - - tsem_post(&pPObj->rspSem); - - pPObj->cmd.command = TSDB_SQL_RETRIEVE_METRIC; - } else { - // in case of second stage join subquery, invoke its callback function instead of regular QueueAsyncRes - SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(&pPObj->cmd, 0); - - if ((pQueryInfo->type & TSDB_QUERY_TYPE_JOIN_SEC_STAGE) == TSDB_QUERY_TYPE_JOIN_SEC_STAGE) { - (*pPObj->fp)(pPObj->param, pPObj, pPObj->res.code); - } else { // regular super table query - if (pPObj->res.code != TSDB_CODE_SUCCESS) { - tscQueueAsyncRes(pPObj); - } - } - } -} - -void tscRetrieveFromVnodeCallBack(void *param, TAOS_RES *tres, int numOfRows) { - SRetrieveSupport *trsupport = (SRetrieveSupport *)param; - int32_t idx = trsupport->subqueryIndex; - SSqlObj * pPObj = trsupport->pParentSqlObj; - tOrderDescriptor *pDesc = trsupport->pOrderDescriptor; - - SSqlObj *pSql = (SSqlObj *)tres; - if (pSql == NULL) { // sql object has been released in error process, return immediately - tscTrace("%p subquery has been released, idx:%d, abort", pPObj, idx); - return; - } - - SSubqueryState* pState = trsupport->pState; - assert(pState->numOfCompleted < pState->numOfTotal && pState->numOfCompleted >= 0 && - pPObj->numOfSubs == pState->numOfTotal); - // query process and cancel query process may execute at the same time - pthread_mutex_lock(&trsupport->queryMutex); - - if (numOfRows < 0 || pState->code < 0 || pPObj->res.code != TSDB_CODE_SUCCESS) { - return tscHandleSubRetrievalError(trsupport, pSql, numOfRows); - } - - SSqlRes * pRes = &pSql->res; - SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, 0); - - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - - SVnodeSidList *vnodeInfo = tscGetVnodeSidList(pMeterMetaInfo->pMetricMeta, idx); - SVPeerDesc * pSvd = &vnodeInfo->vpeerDesc[vnodeInfo->index]; - - if (numOfRows > 0) { - assert(pRes->numOfRows == numOfRows); - int64_t num = atomic_add_fetch_64(&pState->numOfRetrievedRows, numOfRows); - - tscTrace("%p sub:%p retrieve numOfRows:%d totalNumOfRows:%d from ip:%u,vid:%d,orderOfSub:%d", pPObj, pSql, - pRes->numOfRows, pState->numOfRetrievedRows, pSvd->ip, pSvd->vnode, idx); - - if (num > tsMaxNumOfOrderedResults && tscIsProjectionQueryOnSTable(pQueryInfo, 0)) { - tscError("%p sub:%p num of OrderedRes is too many, max allowed:%" PRId64 " , current:%" PRId64, - pPObj, pSql, tsMaxNumOfOrderedResults, num); - tscAbortFurtherRetryRetrieval(trsupport, tres, TSDB_CODE_SORTED_RES_TOO_MANY); - return; - } - - -#ifdef _DEBUG_VIEW - printf("received data from vnode: %d rows\n", pRes->numOfRows); - SSrcColumnInfo colInfo[256] = {0}; - - tscGetSrcColumnInfo(colInfo, pQueryInfo); - tColModelDisplayEx(pDesc->pColumnModel, pRes->data, pRes->numOfRows, pRes->numOfRows, colInfo); -#endif - if (tsTotalTmpDirGB != 0 && tsAvailTmpDirGB < tsMinimalTmpDirGB) { - tscError("%p sub:%p client disk space remain %.3f GB, need at least %.3f GB, stop query", pPObj, pSql, - tsAvailTmpDirGB, tsMinimalTmpDirGB); - tscAbortFurtherRetryRetrieval(trsupport, tres, TSDB_CODE_CLI_NO_DISKSPACE); - return; - } - - int32_t ret = saveToBuffer(trsupport->pExtMemBuffer[idx], pDesc, trsupport->localBuffer, pRes->data, - pRes->numOfRows, pQueryInfo->groupbyExpr.orderType); - if (ret < 0) { - // set no disk space error info, and abort retry - tscAbortFurtherRetryRetrieval(trsupport, tres, TSDB_CODE_CLI_NO_DISKSPACE); - } else { - pthread_mutex_unlock(&trsupport->queryMutex); - taos_fetch_rows_a(tres, tscRetrieveFromVnodeCallBack, param); - } - - } else { // all data has been retrieved to client - /* data in from current vnode is stored in cache and disk */ - uint32_t numOfRowsFromVnode = trsupport->pExtMemBuffer[idx]->numOfTotalElems + trsupport->localBuffer->numOfElems; - tscTrace("%p sub:%p all data retrieved from ip:%u,vid:%d, numOfRows:%d, orderOfSub:%d", pPObj, pSql, pSvd->ip, - pSvd->vnode, numOfRowsFromVnode, idx); - - tColModelCompact(pDesc->pColumnModel, trsupport->localBuffer, pDesc->pColumnModel->capacity); - -#ifdef _DEBUG_VIEW - printf("%" PRIu64 " rows data flushed to disk:\n", trsupport->localBuffer->numOfElems); - SSrcColumnInfo colInfo[256] = {0}; - tscGetSrcColumnInfo(colInfo, pQueryInfo); - tColModelDisplayEx(pDesc->pColumnModel, trsupport->localBuffer->data, trsupport->localBuffer->numOfElems, - trsupport->localBuffer->numOfElems, colInfo); -#endif - - if (tsTotalTmpDirGB != 0 && tsAvailTmpDirGB < tsMinimalTmpDirGB) { - tscError("%p sub:%p client disk space remain %.3f GB, need at least %.3f GB, stop query", pPObj, pSql, - tsAvailTmpDirGB, tsMinimalTmpDirGB); - tscAbortFurtherRetryRetrieval(trsupport, tres, TSDB_CODE_CLI_NO_DISKSPACE); - return; - } - - // each result for a vnode is ordered as an independant list, - // then used as an input of loser tree for disk-based merge routine - int32_t ret = tscFlushTmpBuffer(trsupport->pExtMemBuffer[idx], pDesc, trsupport->localBuffer, - pQueryInfo->groupbyExpr.orderType); - if (ret != 0) { - /* set no disk space error info, and abort retry */ - return tscAbortFurtherRetryRetrieval(trsupport, tres, TSDB_CODE_CLI_NO_DISKSPACE); - } - - // keep this value local variable, since the pState variable may be released by other threads, if atomic_add opertion - // increases the finished value up to pState->numOfTotal value, which means all subqueries are completed. - // In this case, the comparsion between finished value and released pState->numOfTotal is not safe. - int32_t numOfTotal = pState->numOfTotal; - - int32_t finished = atomic_add_fetch_32(&pState->numOfCompleted, 1); - if (finished < numOfTotal) { - tscTrace("%p sub:%p orderOfSub:%d freed, finished subqueries:%d", pPObj, pSql, trsupport->subqueryIndex, finished); - return tscFreeSubSqlObj(trsupport, pSql); - } - - // all sub-queries are returned, start to local merge process - pDesc->pColumnModel->capacity = trsupport->pExtMemBuffer[idx]->numOfElemsPerPage; - - tscTrace("%p retrieve from %d vnodes completed.final NumOfRows:%d,start to build loser tree", pPObj, - pState->numOfTotal, pState->numOfRetrievedRows); - - SQueryInfo *pPQueryInfo = tscGetQueryInfoDetail(&pPObj->cmd, 0); - tscClearInterpInfo(pPQueryInfo); - - tscCreateLocalReducer(trsupport->pExtMemBuffer, pState->numOfTotal, pDesc, trsupport->pFinalColModel, - &pPObj->cmd, &pPObj->res); - tscTrace("%p build loser tree completed", pPObj); - - pPObj->res.precision = pSql->res.precision; - pPObj->res.numOfRows = 0; - pPObj->res.row = 0; - - // only free once - tfree(trsupport->pState); - - tscFreeSubSqlObj(trsupport, pSql); - - if (pPObj->fp == NULL) { - tsem_wait(&pPObj->emptyRspSem); - tsem_wait(&pPObj->emptyRspSem); - - tsem_post(&pPObj->rspSem); - } else { - // set the command flag must be after the semaphore been correctly set. - pPObj->cmd.command = TSDB_SQL_RETRIEVE_METRIC; - if (pPObj->res.code == TSDB_CODE_SUCCESS) { - (*pPObj->fp)(pPObj->param, pPObj, 0); - } else { - tscQueueAsyncRes(pPObj); - } - } - } + return doProcessSql(pSql); } void tscKillMetricQuery(SSqlObj *pSql) { SSqlCmd* pCmd = &pSql->cmd; SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); - if (!tscIsTwoStageMergeMetricQuery(pQueryInfo, 0)) { + if (!tscIsTwoStageSTableQuery(pQueryInfo, 0)) { return; } @@ -1107,117 +494,6 @@ void tscKillMetricQuery(SSqlObj *pSql) { tscTrace("%p metric query is cancelled", pSql); } -static void tscRetrieveDataRes(void *param, TAOS_RES *tres, int retCode); - -static SSqlObj *tscCreateSqlObjForSubquery(SSqlObj *pSql, SRetrieveSupport *trsupport, SSqlObj *prevSqlObj) { - const int32_t table_index = 0; - - SSqlObj *pNew = createSubqueryObj(pSql, table_index, tscRetrieveDataRes, trsupport, prevSqlObj); - if (pNew != NULL) { // the sub query of two-stage super table query - SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); - pQueryInfo->type |= TSDB_QUERY_TYPE_STABLE_SUBQUERY; - - assert(pQueryInfo->numOfTables == 1 && pNew->cmd.numOfClause == 1); - - // launch subquery for each vnode, so the subquery index equals to the vnodeIndex. - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, table_index); - pMeterMetaInfo->vnodeIndex = trsupport->subqueryIndex; - - pSql->pSubs[trsupport->subqueryIndex] = pNew; - } - - return pNew; -} - -void tscRetrieveDataRes(void *param, TAOS_RES *tres, int code) { - SRetrieveSupport *trsupport = (SRetrieveSupport *)param; - - SSqlObj* pParentSql = trsupport->pParentSqlObj; - SSqlObj* pSql = (SSqlObj *)tres; - - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(&pSql->cmd, 0, 0); - assert(pSql->cmd.numOfClause == 1 && pSql->cmd.pQueryInfo[0]->numOfTables == 1); - - int32_t idx = pMeterMetaInfo->vnodeIndex; - - SVnodeSidList *vnodeInfo = NULL; - SVPeerDesc * pSvd = NULL; - if (pMeterMetaInfo->pMetricMeta != NULL) { - vnodeInfo = tscGetVnodeSidList(pMeterMetaInfo->pMetricMeta, idx); - pSvd = &vnodeInfo->vpeerDesc[vnodeInfo->index]; - } - - SSubqueryState* pState = trsupport->pState; - assert(pState->numOfCompleted < pState->numOfTotal && pState->numOfCompleted >= 0 && - pParentSql->numOfSubs == pState->numOfTotal); - - if (pParentSql->res.code != TSDB_CODE_SUCCESS || pState->code != TSDB_CODE_SUCCESS) { - // metric query is killed, Note: code must be less than 0 - trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY; - if (pParentSql->res.code != TSDB_CODE_SUCCESS) { - code = -(int)(pParentSql->res.code); - } else { - code = pState->code; - } - tscTrace("%p query cancelled or failed, sub:%p, orderOfSub:%d abort, code:%d", pParentSql, pSql, - trsupport->subqueryIndex, code); - } - - /* - * if a query on a vnode is failed, all retrieve operations from vnode that occurs later - * than this one are actually not necessary, we simply call the tscRetrieveFromVnodeCallBack - * function to abort current and remain retrieve process. - * - * NOTE: threadsafe is required. - */ - if (code != TSDB_CODE_SUCCESS) { - if (trsupport->numOfRetry++ >= MAX_NUM_OF_SUBQUERY_RETRY) { - tscTrace("%p sub:%p reach the max retry count,set global code:%d", pParentSql, pSql, code); - atomic_val_compare_exchange_32(&pState->code, 0, code); - } else { // does not reach the maximum retry count, go on - tscTrace("%p sub:%p failed code:%d, retry:%d", pParentSql, pSql, code, trsupport->numOfRetry); - - SSqlObj *pNew = tscCreateSqlObjForSubquery(pParentSql, trsupport, pSql); - if (pNew == NULL) { - tscError("%p sub:%p failed to create new subquery due to out of memory, abort retry, vid:%d, orderOfSub:%d", - trsupport->pParentSqlObj, pSql, pSvd != NULL ? pSvd->vnode : -1, trsupport->subqueryIndex); - - pState->code = -TSDB_CODE_CLI_OUT_OF_MEMORY; - trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY; - } else { - SQueryInfo *pNewQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); - assert(pNewQueryInfo->pMeterInfo[0]->pMeterMeta != NULL && pNewQueryInfo->pMeterInfo[0]->pMetricMeta != NULL); - tscProcessSql(pNew); - return; - } - } - } - - if (pState->code != TSDB_CODE_SUCCESS) { // failed, abort - if (vnodeInfo != NULL) { - tscTrace("%p sub:%p query failed,ip:%u,vid:%d,orderOfSub:%d,global code:%d", pParentSql, pSql, - vnodeInfo->vpeerDesc[vnodeInfo->index].ip, vnodeInfo->vpeerDesc[vnodeInfo->index].vnode, - trsupport->subqueryIndex, pState->code); - } else { - tscTrace("%p sub:%p query failed,orderOfSub:%d,global code:%d", pParentSql, pSql, - trsupport->subqueryIndex, pState->code); - } - - tscRetrieveFromVnodeCallBack(param, tres, pState->code); - } else { // success, proceed to retrieve data from dnode - if (vnodeInfo != NULL) { - tscTrace("%p sub:%p query complete,ip:%u,vid:%d,orderOfSub:%d,retrieve data", trsupport->pParentSqlObj, pSql, - vnodeInfo->vpeerDesc[vnodeInfo->index].ip, vnodeInfo->vpeerDesc[vnodeInfo->index].vnode, - trsupport->subqueryIndex); - } else { - tscTrace("%p sub:%p query complete, orderOfSub:%d,retrieve data", trsupport->pParentSqlObj, pSql, - trsupport->subqueryIndex); - } - - taos_fetch_rows_a(tres, tscRetrieveFromVnodeCallBack, param); - } -} - int tscBuildRetrieveMsg(SSqlObj *pSql, SSqlInfo *pInfo) { char *pMsg, *pStart; @@ -1241,16 +517,16 @@ int tscBuildRetrieveMsg(SSqlObj *pSql, SSqlInfo *pInfo) { void tscUpdateVnodeInSubmitMsg(SSqlObj *pSql, char *buf) { //SShellSubmitMsg *pShellMsg; //char * pMsg; - //SMeterMetaInfo * pMeterMetaInfo = tscGetMeterMetaInfo(&pSql->cmd, pSql->cmd.clauseIndex, 0); + //STableMetaInfo * pTableMetaInfo = tscGetTableMetaInfoFromCmd(&pSql->cmd, pSql->cmd.clauseIndex, 0); - //STableMeta *pMeterMeta = pMeterMetaInfo->pMeterMeta; + //STableMeta *pTableMeta = pTableMetaInfo->pTableMeta; //pMsg = buf + tsRpcHeadSize; //TODO set iplist //pShellMsg = (SShellSubmitMsg *)pMsg; - //pShellMsg->vnode = htons(pMeterMeta->vpeerDesc[pSql->index].vnode); - //tscTrace("%p update submit msg vnode:%s:%d", pSql, taosIpStr(pMeterMeta->vpeerDesc[pSql->index].ip), + //pShellMsg->vnode = htons(pTableMeta->vpeerDesc[pSql->index].vnode); + //tscTrace("%p update submit msg vnode:%s:%d", pSql, taosIpStr(pTableMeta->vpeerDesc[pSql->index].ip), // htons(pShellMsg->vnode)); } @@ -1258,26 +534,28 @@ int tscBuildSubmitMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SShellSubmitMsg *pShellMsg; char * pMsg, *pStart; - SQueryInfo * pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, 0); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - - STableMeta *pMeterMeta = pMeterMetaInfo->pMeterMeta; - + SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, 0); + STableMeta* pTableMeta = tscGetMetaInfo(pQueryInfo, 0)->pTableMeta; + pStart = pSql->cmd.payload + tsRpcHeadSize; pMsg = pStart; pShellMsg = (SShellSubmitMsg *)pMsg; - + + pShellMsg->desc.numOfVnodes = htonl(1); + pShellMsg->import = htons(TSDB_QUERY_HAS_TYPE(pQueryInfo->type, TSDB_QUERY_TYPE_INSERT) ? 0 : 1); - pShellMsg->vnode = 0; //htons(pMeterMeta->vpeerDesc[pMeterMeta->index].vnode); - pShellMsg->numOfSid = htonl(pSql->cmd.numOfTablesInSubmit); // number of meters to be inserted + pShellMsg->header.vgId = htonl(pTableMeta->vgId); + pShellMsg->header.contLen = htonl(pSql->cmd.payloadLen); + + pShellMsg->numOfTables = htonl(pSql->cmd.numOfTablesInSubmit); // number of meters to be inserted // pSql->cmd.payloadLen is set during parse sql routine, so we do not use it here pSql->cmd.msgType = TSDB_MSG_TYPE_SUBMIT; - tscTrace("%p update submit msg vnode:%s:%d", pSql, taosIpStr(pMeterMeta->vpeerDesc[pMeterMeta->index].ip), - htons(pShellMsg->vnode)); +// tscTrace("%p update submit msg vnode:%s:%d", pSql, taosIpStr(pTableMeta->vpeerDesc[pTableMeta->index].ip), +// htons(pShellMsg->vnode)); - pSql->cmd.payloadLen = sizeof(SShellSubmitMsg); +// pSql->cmd.payloadLen = sizeof(SShellSubmitMsg); return TSDB_CODE_SUCCESS; } @@ -1285,17 +563,17 @@ int tscBuildSubmitMsg(SSqlObj *pSql, SSqlInfo *pInfo) { void tscUpdateVnodeInQueryMsg(SSqlObj *pSql, char *buf) { //TODO // SSqlCmd * pCmd = &pSql->cmd; -// SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); +// STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); // // char * pStart = buf + tsRpcHeadSize; // SQueryTableMsg *pQueryMsg = (SQueryTableMsg *)pStart; // -// if (UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { // pColumnModel == NULL, query on meter -// STableMeta *pMeterMeta = pMeterMetaInfo->pMeterMeta; -// pQueryMsg->vnode = htons(pMeterMeta->vpeerDesc[pSql->index].vnode); +// if (UTIL_TABLE_IS_NOMRAL_TABLE(pTableMetaInfo)) { // pColumnModel == NULL, query on meter +// STableMeta *pTableMeta = pTableMetaInfo->pTableMeta; +// pQueryMsg->vnode = htons(pTableMeta->vpeerDesc[pSql->index].vnode); // } else { // query on metric -// SSuperTableMeta * pMetricMeta = pMeterMetaInfo->pMetricMeta; -// SVnodeSidList *pVnodeSidList = tscGetVnodeSidList(pMetricMeta, pMeterMetaInfo->vnodeIndex); +// SSuperTableMeta * pMetricMeta = pTableMetaInfo->pMetricMeta; +// SVnodeSidList *pVnodeSidList = tscGetVnodeSidList(pMetricMeta, pTableMetaInfo->vnodeIndex); // pQueryMsg->vnode = htons(pVnodeSidList->vpeerDesc[pSql->index].vnode); // } } @@ -1311,17 +589,17 @@ static int32_t tscEstimateQueryMsgSize(SSqlCmd *pCmd, int32_t clauseIndex) { int32_t srcColListSize = pQueryInfo->colList.numOfCols * sizeof(SColumnInfo); int32_t exprSize = sizeof(SSqlFuncExprMsg) * pQueryInfo->exprsInfo.numOfExprs; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); // meter query without tags values - if (!UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { + if (!UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { return MIN_QUERY_MSG_PKT_SIZE + minMsgSize() + sizeof(SQueryTableMsg) + srcColListSize + exprSize; } - SSuperTableMeta *pMetricMeta = pMeterMetaInfo->pMetricMeta; - SVnodeSidList *pVnodeSidList = tscGetVnodeSidList(pMetricMeta, pMeterMetaInfo->vnodeIndex); + SSuperTableMeta *pMetricMeta = pTableMetaInfo->pMetricMeta; + SVnodeSidList *pVnodeSidList = tscGetVnodeSidList(pMetricMeta, pTableMetaInfo->vnodeIndex); - int32_t meterInfoSize = (pMetricMeta->tagLen + sizeof(STableSidExtInfo)) * pVnodeSidList->numOfSids; + int32_t meterInfoSize = (pMetricMeta->tagLen + sizeof(STableIdInfo)) * pVnodeSidList->numOfSids; int32_t outputColumnSize = pQueryInfo->exprsInfo.numOfExprs * sizeof(SSqlFuncExprMsg); int32_t size = meterInfoSize + outputColumnSize + srcColListSize + exprSize + MIN_QUERY_MSG_PKT_SIZE; @@ -1332,37 +610,34 @@ static int32_t tscEstimateQueryMsgSize(SSqlCmd *pCmd, int32_t clauseIndex) { return size; } -static char *doSerializeTableInfo(SSqlObj *pSql, int32_t numOfTables, int32_t vnodeId, char *pMsg) { - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(&pSql->cmd, pSql->cmd.clauseIndex, 0); +static char *doSerializeTableInfo(SSqlObj *pSql, int32_t numOfTables, int32_t vgId, char *pMsg) { + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(&pSql->cmd, pSql->cmd.clauseIndex, 0); - STableMeta * pMeterMeta = pMeterMetaInfo->pMeterMeta; - SSuperTableMeta *pMetricMeta = pMeterMetaInfo->pMetricMeta; + STableMeta * pTableMeta = pTableMetaInfo->pTableMeta; + SSuperTableMeta *pMetricMeta = pTableMetaInfo->pMetricMeta; - tscTrace("%p vid:%d, query on %d meters", pSql, vnodeId, numOfTables); - if (UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { + tscTrace("%p vgId:%d, query on %d tables", pSql, vgId, numOfTables); + if (UTIL_TABLE_IS_NOMRAL_TABLE(pTableMetaInfo)) { #ifdef _DEBUG_VIEW - tscTrace("%p sid:%d, uid:%" PRIu64, pSql, pMeterMetaInfo->pMeterMeta->sid, pMeterMetaInfo->pMeterMeta->uid); + tscTrace("%p sid:%d, uid:%" PRIu64, pSql, pTableMetaInfo->pTableMeta->sid, pTableMetaInfo->pTableMeta->uid); #endif - STableSidExtInfo *pMeterInfo = (STableSidExtInfo *)pMsg; - pMeterInfo->sid = htonl(pMeterMeta->sid); - pMeterInfo->uid = htobe64(pMeterMeta->uid); - pMeterInfo->key = htobe64(tscGetSubscriptionProgress(pSql->pSubscription, pMeterMeta->uid)); - pMsg += sizeof(STableSidExtInfo); + STableIdInfo *pTableIdInfo = (STableIdInfo *)pMsg; + pTableIdInfo->sid = htonl(pTableMeta->sid); + pTableIdInfo->uid = htobe64(pTableMeta->uid); + pTableIdInfo->key = htobe64(tscGetSubscriptionProgress(pSql->pSubscription, pTableMeta->uid)); + pMsg += sizeof(STableIdInfo); } else { - SVnodeSidList *pVnodeSidList = tscGetVnodeSidList(pMetricMeta, pMeterMetaInfo->vnodeIndex); + SVnodeSidList *pVnodeSidList = tscGetVnodeSidList(pMetricMeta, pTableMetaInfo->vnodeIndex); for (int32_t i = 0; i < numOfTables; ++i) { - STableSidExtInfo *pMeterInfo = (STableSidExtInfo *)pMsg; - STableSidExtInfo *pQueryMeterInfo = tscGetMeterSidInfo(pVnodeSidList, i); + STableIdInfo *pTableIdInfo = (STableIdInfo *)pMsg; + STableIdInfo *pQueryMeterInfo = tscGetMeterSidInfo(pVnodeSidList, i); - pMeterInfo->sid = htonl(pQueryMeterInfo->sid); - pMeterInfo->uid = htobe64(pQueryMeterInfo->uid); - pMeterInfo->key = htobe64(tscGetSubscriptionProgress(pSql->pSubscription, pQueryMeterInfo->uid)); + pTableIdInfo->sid = htonl(pQueryMeterInfo->sid); + pTableIdInfo->uid = htobe64(pQueryMeterInfo->uid); + pTableIdInfo->key = htobe64(tscGetSubscriptionProgress(pSql->pSubscription, pQueryMeterInfo->uid)); - pMsg += sizeof(STableSidExtInfo); - - memcpy(pMsg, pQueryMeterInfo->tags, pMetricMeta->tagLen); - pMsg += pMetricMeta->tagLen; + pMsg += sizeof(STableIdInfo); #ifdef _DEBUG_VIEW tscTrace("%p sid:%d, uid:%" PRId64, pSql, pQueryMeterInfo->sid, pQueryMeterInfo->uid); @@ -1384,34 +659,33 @@ int tscBuildQueryMsg(SSqlObj *pSql, SSqlInfo *pInfo) { } SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); - char * pStart = pCmd->payload + tsRpcHeadSize; + char *pStart = pCmd->payload + tsRpcHeadSize; - STableMeta * pMeterMeta = pMeterMetaInfo->pMeterMeta; - SSuperTableMeta *pMetricMeta = pMeterMetaInfo->pMetricMeta; + STableMeta * pTableMeta = pTableMetaInfo->pTableMeta; + SSuperTableMeta *pMetricMeta = pTableMetaInfo->pMetricMeta; SQueryTableMsg *pQueryMsg = (SQueryTableMsg *)pStart; int32_t msgLen = 0; int32_t numOfTables = 0; - if (UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { + if (UTIL_TABLE_IS_NOMRAL_TABLE(pTableMetaInfo)) { numOfTables = 1; - tscTrace("%p query on vnode: %d, number of sid:%d, meter id: %s", pSql, - pMeterMeta->vpeerDesc[pMeterMeta->index].vnode, 1, pMeterMetaInfo->name); - - pQueryMsg->vnode = htons(pMeterMeta->vpeerDesc[pMeterMeta->index].vnode); - pQueryMsg->uid = pMeterMeta->uid; + pQueryMsg->uid = pTableMeta->uid; pQueryMsg->numOfTagsCols = 0; + + pQueryMsg->vgId = htonl(pTableMeta->vgId); + tscTrace("%p queried tables:%d, table id: %s", pSql, 1, pTableMetaInfo->name); } else { // query on super table - if (pMeterMetaInfo->vnodeIndex < 0) { - tscError("%p error vnodeIdx:%d", pSql, pMeterMetaInfo->vnodeIndex); + if (pTableMetaInfo->vnodeIndex < 0) { + tscError("%p error vnodeIdx:%d", pSql, pTableMetaInfo->vnodeIndex); return -1; } - SVnodeSidList *pVnodeSidList = tscGetVnodeSidList(pMetricMeta, pMeterMetaInfo->vnodeIndex); + SVnodeSidList *pVnodeSidList = tscGetVnodeSidList(pMetricMeta, pTableMetaInfo->vnodeIndex); uint32_t vnodeId = pVnodeSidList->vpeerDesc[pVnodeSidList->index].vnode; numOfTables = pVnodeSidList->numOfSids; @@ -1420,19 +694,19 @@ int tscBuildQueryMsg(SSqlObj *pSql, SSqlInfo *pInfo) { return -1; // error } - tscTrace("%p query on vid:%d, number of sid:%d", pSql, vnodeId, numOfTables); - pQueryMsg->vnode = htons(vnodeId); + tscTrace("%p query on vid:%d, number of tables:%d", pSql, vnodeId, numOfTables); + pQueryMsg->vgId = htons(vnodeId); } - pQueryMsg->numOfSids = htonl(numOfTables); - pQueryMsg->numOfTagsCols = htons(pMeterMetaInfo->numOfTags); + pQueryMsg->numOfTables = htonl(numOfTables); + pQueryMsg->numOfTagsCols = htons(pTableMetaInfo->numOfTags); if (pQueryInfo->order.order == TSQL_SO_ASC) { - pQueryMsg->skey = htobe64(pQueryInfo->stime); - pQueryMsg->ekey = htobe64(pQueryInfo->etime); + pQueryMsg->window.skey = htobe64(pQueryInfo->stime); + pQueryMsg->window.ekey = htobe64(pQueryInfo->etime); } else { - pQueryMsg->skey = htobe64(pQueryInfo->etime); - pQueryMsg->ekey = htobe64(pQueryInfo->stime); + pQueryMsg->window.skey = htobe64(pQueryInfo->etime); + pQueryMsg->window.ekey = htobe64(pQueryInfo->stime); } pQueryMsg->order = htons(pQueryInfo->order.order); @@ -1446,17 +720,12 @@ int tscBuildQueryMsg(SSqlObj *pSql, SSqlInfo *pInfo) { pQueryMsg->numOfCols = htons(pQueryInfo->colList.numOfCols); if (pQueryInfo->colList.numOfCols <= 0) { - tscError("%p illegal value of numOfCols in query msg: %d", pSql, pMeterMeta->numOfColumns); - return -1; - } - - if (pMeterMeta->numOfTags < 0) { - tscError("%p illegal value of numOfTagsCols in query msg: %d", pSql, pMeterMeta->numOfTags); + tscError("%p illegal value of numOfCols in query msg: %d", pSql, tscGetNumOfColumns(pTableMeta)); return -1; } pQueryMsg->intervalTime = htobe64(pQueryInfo->intervalTime); - pQueryMsg->intervalTimeUnit = pQueryInfo->intervalTimeUnit; + pQueryMsg->slidingTimeUnit = pQueryInfo->slidingTimeUnit; pQueryMsg->slidingTime = htobe64(pQueryInfo->slidingTime); if (pQueryInfo->intervalTime < 0) { @@ -1471,9 +740,9 @@ int tscBuildQueryMsg(SSqlObj *pSql, SSqlInfo *pInfo) { pQueryMsg->numOfGroupCols = htons(pQueryInfo->groupbyExpr.numOfGroupCols); - if (UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { // query on meter + if (UTIL_TABLE_IS_NOMRAL_TABLE(pTableMetaInfo)) { // query on meter pQueryMsg->tagLength = 0; - } else { // query on metric + } else { // query on super table pQueryMsg->tagLength = htons(pMetricMeta->tagLen); } @@ -1488,20 +757,20 @@ int tscBuildQueryMsg(SSqlObj *pSql, SSqlInfo *pInfo) { // set column list ids char * pMsg = (char *)(pQueryMsg->colList) + pQueryInfo->colList.numOfCols * sizeof(SColumnInfo); - SSchema *pSchema = tsGetSchema(pMeterMeta); + SSchema *pSchema = tscGetTableSchema(pTableMeta); for (int32_t i = 0; i < pQueryInfo->colList.numOfCols; ++i) { SColumnBase *pCol = tscColumnBaseInfoGet(&pQueryInfo->colList, i); SSchema * pColSchema = &pSchema[pCol->colIndex.columnIndex]; - if (pCol->colIndex.columnIndex >= pMeterMeta->numOfColumns || pColSchema->type < TSDB_DATA_TYPE_BOOL || - pColSchema->type > TSDB_DATA_TYPE_NCHAR) { - tscError("%p vid:%d sid:%d id:%s, column index out of range, numOfColumns:%d, index:%d, column name:%s", pSql, - htons(pQueryMsg->vnode), pMeterMeta->sid, pMeterMetaInfo->name, pMeterMeta->numOfColumns, pCol->colIndex, - pColSchema->name); - - return -1; // 0 means build msg failed - } +// if (pCol->colIndex.columnIndex >= tscGetNumOfColumns(pTableMeta) || pColSchema->type < TSDB_DATA_TYPE_BOOL || +// pColSchema->type > TSDB_DATA_TYPE_NCHAR) { +// tscError("%p vid:%d sid:%d id:%s, column index out of range, numOfColumns:%d, index:%d, column name:%s", pSql, +// htons(pQueryMsg->vnode), pTableMeta->sid, pTableMetaInfo->name, tscGetNumOfColumns(pTableMeta), pCol->colIndex, +// pColSchema->name); +// +// return -1; // 0 means build msg failed +// } pQueryMsg->colList[i].colId = htons(pColSchema->colId); pQueryMsg->colList[i].bytes = htons(pColSchema->bytes); @@ -1547,7 +816,7 @@ int tscBuildQueryMsg(SSqlObj *pSql, SSqlInfo *pInfo) { hasArithmeticFunction = true; } - if (!tscValidateColumnId(pMeterMetaInfo, pExpr->colInfo.colId)) { + if (!tscValidateColumnId(pTableMetaInfo, pExpr->colInfo.colId)) { /* column id is not valid according to the cached metermeta, the meter meta is expired */ tscError("%p table schema is not matched with parsed sql", pSql); return -1; @@ -1595,20 +864,20 @@ int tscBuildQueryMsg(SSqlObj *pSql, SSqlInfo *pInfo) { pQueryMsg->colNameLen = htonl(len); // serialize the table info (sid, uid, tags) - pMsg = doSerializeTableInfo(pSql, numOfTables, htons(pQueryMsg->vnode), pMsg); + pMsg = doSerializeTableInfo(pSql, numOfTables, htons(pQueryMsg->vgId), pMsg); // only include the required tag column schema. If a tag is not required, it won't be sent to vnode - if (pMeterMetaInfo->numOfTags > 0) { + if (pTableMetaInfo->numOfTags > 0) { // always transfer tag schema to vnode if exists - SSchema *pTagSchema = tsGetTagSchema(pMeterMeta); + SSchema *pTagSchema = tscGetTableTagSchema(pTableMeta); - for (int32_t j = 0; j < pMeterMetaInfo->numOfTags; ++j) { - if (pMeterMetaInfo->tagColumnIndex[j] == TSDB_TBNAME_COLUMN_INDEX) { + for (int32_t j = 0; j < pTableMetaInfo->numOfTags; ++j) { + if (pTableMetaInfo->tagColumnIndex[j] == TSDB_TBNAME_COLUMN_INDEX) { SSchema tbSchema = { .bytes = TSDB_TABLE_NAME_LEN, .colId = TSDB_TBNAME_COLUMN_INDEX, .type = TSDB_DATA_TYPE_BINARY}; memcpy(pMsg, &tbSchema, sizeof(SSchema)); } else { - memcpy(pMsg, &pTagSchema[pMeterMetaInfo->tagColumnIndex[j]], sizeof(SSchema)); + memcpy(pMsg, &pTagSchema[pTableMetaInfo->tagColumnIndex[j]], sizeof(SSchema)); } pMsg += sizeof(SSchema); @@ -1653,7 +922,7 @@ int tscBuildQueryMsg(SSqlObj *pSql, SSqlInfo *pInfo) { int32_t numOfBlocks = 0; if (pQueryInfo->tsBuf != NULL) { - STSVnodeBlockInfo *pBlockInfo = tsBufGetVnodeBlockInfo(pQueryInfo->tsBuf, pMeterMetaInfo->vnodeIndex); + STSVnodeBlockInfo *pBlockInfo = tsBufGetVnodeBlockInfo(pQueryInfo->tsBuf, pTableMetaInfo->vnodeIndex); assert(QUERY_IS_JOIN_QUERY(pQueryInfo->type) && pBlockInfo != NULL); // this query should not be sent // todo refactor @@ -1676,7 +945,9 @@ int tscBuildQueryMsg(SSqlObj *pSql, SSqlInfo *pInfo) { tscTrace("%p msg built success,len:%d bytes", pSql, msgLen); pCmd->payloadLen = msgLen; pSql->cmd.msgType = TSDB_MSG_TYPE_QUERY; - + + pQueryMsg->contLen = htonl(msgLen); + assert(msgLen + minMsgSize() <= size); return TSDB_CODE_SUCCESS; @@ -1684,47 +955,47 @@ int tscBuildQueryMsg(SSqlObj *pSql, SSqlInfo *pInfo) { int32_t tscBuildCreateDbMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SCreateDbMsg); - pCmd->msgType = TSDB_MSG_TYPE_CREATE_DB; + pCmd->payloadLen = sizeof(SCMCreateDbMsg); + pCmd->msgType = TSDB_MSG_TYPE_CM_CREATE_DB; if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("%p failed to malloc for query msg", pSql); return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SCreateDbMsg *pCreateDbMsg = (SCreateDbMsg*)pCmd->payload; + SCMCreateDbMsg *pCreateDbMsg = (SCMCreateDbMsg*)pCmd->payload; assert(pCmd->numOfClause == 1); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - strncpy(pCreateDbMsg->db, pMeterMetaInfo->name, tListLen(pCreateDbMsg->db)); + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + strncpy(pCreateDbMsg->db, pTableMetaInfo->name, tListLen(pCreateDbMsg->db)); return TSDB_CODE_SUCCESS; } int32_t tscBuildCreateDnodeMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SCreateDnodeMsg); + pCmd->payloadLen = sizeof(SCMCreateDnodeMsg); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("%p failed to malloc for query msg", pSql); return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SCreateDnodeMsg *pCreate = (SCreateDnodeMsg *)pCmd->payload; + SCMCreateDnodeMsg *pCreate = (SCMCreateDnodeMsg *)pCmd->payload; strncpy(pCreate->ip, pInfo->pDCLInfo->a[0].z, pInfo->pDCLInfo->a[0].n); - pCmd->msgType = TSDB_MSG_TYPE_CREATE_DNODE; + pCmd->msgType = TSDB_MSG_TYPE_CM_CREATE_DNODE; return TSDB_CODE_SUCCESS; } int32_t tscBuildAcctMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SCreateAcctMsg); + pCmd->payloadLen = sizeof(SCMCreateAcctMsg); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("%p failed to malloc for query msg", pSql); return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SCreateAcctMsg *pAlterMsg = (SCreateAcctMsg *)pCmd->payload; + SCMCreateAcctMsg *pAlterMsg = (SCMCreateAcctMsg *)pCmd->payload; SSQLToken *pName = &pInfo->pDCLInfo->user.user; SSQLToken *pPwd = &pInfo->pDCLInfo->user.passwd; @@ -1757,20 +1028,20 @@ int32_t tscBuildAcctMsg(SSqlObj *pSql, SSqlInfo *pInfo) { } } - pCmd->msgType = TSDB_MSG_TYPE_CREATE_ACCT; + pCmd->msgType = TSDB_MSG_TYPE_CM_CREATE_ACCT; return TSDB_CODE_SUCCESS; } int32_t tscBuildUserMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SCreateUserMsg); + pCmd->payloadLen = sizeof(SCMCreateUserMsg); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("%p failed to malloc for query msg", pSql); return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SCreateUserMsg *pAlterMsg = (SCreateUserMsg*)pCmd->payload; + SCMCreateUserMsg *pAlterMsg = (SCMCreateUserMsg*)pCmd->payload; SUserInfo *pUser = &pInfo->pDCLInfo->user; strncpy(pAlterMsg->user, pUser->user.z, pUser->user.n); @@ -1785,9 +1056,9 @@ int32_t tscBuildUserMsg(SSqlObj *pSql, SSqlInfo *pInfo) { } if (pUser->type == TSDB_ALTER_USER_PASSWD || pUser->type == TSDB_ALTER_USER_PRIVILEGES) { - pCmd->msgType = TSDB_MSG_TYPE_ALTER_USER; + pCmd->msgType = TSDB_MSG_TYPE_CM_ALTER_USER; } else { - pCmd->msgType = TSDB_MSG_TYPE_CREATE_USER; + pCmd->msgType = TSDB_MSG_TYPE_CM_CREATE_USER; } return TSDB_CODE_SUCCESS; @@ -1808,87 +1079,87 @@ int32_t tscBuildCfgDnodeMsg(SSqlObj *pSql, SSqlInfo *pInfo) { int32_t tscBuildDropDbMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SDropDbMsg); + pCmd->payloadLen = sizeof(SCMDropDbMsg); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("%p failed to malloc for query msg", pSql); return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SDropDbMsg *pDropDbMsg = (SDropDbMsg*)pCmd->payload; + SCMDropDbMsg *pDropDbMsg = (SCMDropDbMsg*)pCmd->payload; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - strncpy(pDropDbMsg->db, pMeterMetaInfo->name, tListLen(pDropDbMsg->db)); + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + strncpy(pDropDbMsg->db, pTableMetaInfo->name, tListLen(pDropDbMsg->db)); pDropDbMsg->ignoreNotExists = pInfo->pDCLInfo->existsCheck ? 1 : 0; - pCmd->msgType = TSDB_MSG_TYPE_DROP_DB; + pCmd->msgType = TSDB_MSG_TYPE_CM_DROP_DB; return TSDB_CODE_SUCCESS; } int32_t tscBuildDropTableMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SDropTableMsg); + pCmd->payloadLen = sizeof(SCMDropTableMsg); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("%p failed to malloc for query msg", pSql); return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SDropTableMsg *pDropTableMsg = (SDropTableMsg*)pCmd->payload; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - strcpy(pDropTableMsg->tableId, pMeterMetaInfo->name); + SCMDropTableMsg *pDropTableMsg = (SCMDropTableMsg*)pCmd->payload; + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + strcpy(pDropTableMsg->tableId, pTableMetaInfo->name); pDropTableMsg->igNotExists = pInfo->pDCLInfo->existsCheck ? 1 : 0; - pCmd->msgType = TSDB_MSG_TYPE_DROP_TABLE; + pCmd->msgType = TSDB_MSG_TYPE_CM_DROP_TABLE; return TSDB_CODE_SUCCESS; } int32_t tscBuildDropDnodeMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SDropDnodeMsg); + pCmd->payloadLen = sizeof(SCMDropDnodeMsg); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("%p failed to malloc for query msg", pSql); return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SDropDnodeMsg *pDrop = (SDropDnodeMsg *)pCmd->payload; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - strcpy(pDrop->ip, pMeterMetaInfo->name); - pCmd->msgType = TSDB_MSG_TYPE_DROP_DNODE; + SCMDropDnodeMsg *pDrop = (SCMDropDnodeMsg *)pCmd->payload; + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + strcpy(pDrop->ip, pTableMetaInfo->name); + pCmd->msgType = TSDB_MSG_TYPE_CM_DROP_DNODE; return TSDB_CODE_SUCCESS; } int32_t tscBuildDropAcctMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SDropUserMsg); - pCmd->msgType = TSDB_MSG_TYPE_DROP_USER; + pCmd->payloadLen = sizeof(SCMDropUserMsg); + pCmd->msgType = TSDB_MSG_TYPE_CM_DROP_USER; if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("%p failed to malloc for query msg", pSql); return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SDropUserMsg *pDropMsg = (SDropUserMsg*)pCmd->payload; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - strcpy(pDropMsg->user, pMeterMetaInfo->name); + SCMDropUserMsg *pDropMsg = (SCMDropUserMsg*)pCmd->payload; + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + strcpy(pDropMsg->user, pTableMetaInfo->name); return TSDB_CODE_SUCCESS; } int32_t tscBuildUseDbMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SUseDbMsg); + pCmd->payloadLen = sizeof(SCMUseDbMsg); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("%p failed to malloc for query msg", pSql); return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SUseDbMsg *pUseDbMsg = (SUseDbMsg*)pCmd->payload; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - strcpy(pUseDbMsg->db, pMeterMetaInfo->name); - pCmd->msgType = TSDB_MSG_TYPE_USE_DB; + SCMUseDbMsg *pUseDbMsg = (SCMUseDbMsg*)pCmd->payload; + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + strcpy(pUseDbMsg->db, pTableMetaInfo->name); + pCmd->msgType = TSDB_MSG_TYPE_CM_USE_DB; return TSDB_CODE_SUCCESS; } @@ -1896,20 +1167,20 @@ int32_t tscBuildUseDbMsg(SSqlObj *pSql, SSqlInfo *pInfo) { int32_t tscBuildShowMsg(SSqlObj *pSql, SSqlInfo *pInfo) { STscObj *pObj = pSql->pTscObj; SSqlCmd *pCmd = &pSql->cmd; - pCmd->msgType = TSDB_MSG_TYPE_SHOW; - pCmd->payloadLen = sizeof(SShowMsg) + 100; + pCmd->msgType = TSDB_MSG_TYPE_CM_SHOW; + pCmd->payloadLen = sizeof(SCMShowMsg) + 100; if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("%p failed to malloc for query msg", pSql); return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SShowMsg *pShowMsg = (SShowMsg*)pCmd->payload; + SCMShowMsg *pShowMsg = (SCMShowMsg*)pCmd->payload; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - size_t nameLen = strlen(pMeterMetaInfo->name); + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + size_t nameLen = strlen(pTableMetaInfo->name); if (nameLen > 0) { - strcpy(pShowMsg->db, pMeterMetaInfo->name); // prefix is set here + strcpy(pShowMsg->db, pTableMetaInfo->name); // prefix is set here } else { strcpy(pShowMsg->db, pObj->db); } @@ -1931,30 +1202,30 @@ int32_t tscBuildShowMsg(SSqlObj *pSql, SSqlInfo *pInfo) { pShowMsg->payloadLen = htons(pIpAddr->n); } - pCmd->payloadLen = sizeof(SShowMsg) + pShowMsg->payloadLen; + pCmd->payloadLen = sizeof(SCMShowMsg) + pShowMsg->payloadLen; return TSDB_CODE_SUCCESS; } int32_t tscBuildKillMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SKillQueryMsg); + pCmd->payloadLen = sizeof(SCMKillQueryMsg); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("%p failed to malloc for query msg", pSql); return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SKillQueryMsg *pKill = (SKillQueryMsg*)pCmd->payload; + SCMKillQueryMsg *pKill = (SCMKillQueryMsg*)pCmd->payload; strncpy(pKill->queryId, pInfo->pDCLInfo->ip.z, pInfo->pDCLInfo->ip.n); switch (pCmd->command) { case TSDB_SQL_KILL_QUERY: - pCmd->msgType = TSDB_MSG_TYPE_KILL_QUERY; + pCmd->msgType = TSDB_MSG_TYPE_CM_KILL_QUERY; break; case TSDB_SQL_KILL_CONNECTION: - pCmd->msgType = TSDB_MSG_TYPE_KILL_CONNECTION; + pCmd->msgType = TSDB_MSG_TYPE_CM_KILL_CONN; break; case TSDB_SQL_KILL_STREAM: - pCmd->msgType = TSDB_MSG_TYPE_KILL_STREAM; + pCmd->msgType = TSDB_MSG_TYPE_CM_KILL_STREAM; break; } return TSDB_CODE_SUCCESS; @@ -1963,7 +1234,7 @@ int32_t tscBuildKillMsg(SSqlObj *pSql, SSqlInfo *pInfo) { int tscEstimateCreateTableMsgLength(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &(pSql->cmd); - int32_t size = minMsgSize() + sizeof(SMgmtHead) + sizeof(SCreateTableMsg); + int32_t size = minMsgSize() + sizeof(SMgmtHead) + sizeof(SCMCreateTableMsg); SCreateTableSQL *pCreateTableInfo = pInfo->pCreateTableInfo; if (pCreateTableInfo->type == TSQL_CREATE_TABLE_FROM_STABLE) { @@ -1986,7 +1257,7 @@ int tscBuildCreateTableMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; SQueryInfo * pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); // Reallocate the payload size size = tscEstimateCreateTableMsgLength(pSql, pInfo); @@ -1996,11 +1267,11 @@ int tscBuildCreateTableMsg(SSqlObj *pSql, SSqlInfo *pInfo) { } - SCreateTableMsg *pCreateTableMsg = (SCreateTableMsg *)pCmd->payload; - strcpy(pCreateTableMsg->tableId, pMeterMetaInfo->name); + SCMCreateTableMsg *pCreateTableMsg = (SCMCreateTableMsg *)pCmd->payload; + strcpy(pCreateTableMsg->tableId, pTableMetaInfo->name); // use dbinfo from table id without modifying current db info - tscGetDBInfoFromMeterId(pMeterMetaInfo->name, pCreateTableMsg->db); + tscGetDBInfoFromMeterId(pTableMetaInfo->name, pCreateTableMsg->db); SCreateTableSQL *pCreateTable = pInfo->pCreateTableInfo; @@ -2042,8 +1313,9 @@ int tscBuildCreateTableMsg(SSqlObj *pSql, SSqlInfo *pInfo) { tscClearFieldInfo(&pQueryInfo->fieldsInfo); msgLen = pMsg - (char*)pCreateTableMsg; + pCreateTableMsg->contLen = htonl(msgLen); pCmd->payloadLen = msgLen; - pCmd->msgType = TSDB_MSG_TYPE_CREATE_TABLE; + pCmd->msgType = TSDB_MSG_TYPE_CM_CREATE_TABLE; assert(msgLen + minMsgSize() <= size); return TSDB_CODE_SUCCESS; @@ -2051,12 +1323,12 @@ int tscBuildCreateTableMsg(SSqlObj *pSql, SSqlInfo *pInfo) { int tscEstimateAlterTableMsgLength(SSqlCmd *pCmd) { SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); - return minMsgSize() + sizeof(SMgmtHead) + sizeof(SAlterTableMsg) + sizeof(SSchema) * tscNumOfFields(pQueryInfo) + + return minMsgSize() + sizeof(SMgmtHead) + sizeof(SCMAlterTableMsg) + sizeof(SSchema) * tscNumOfFields(pQueryInfo) + TSDB_EXTRA_PAYLOAD_SIZE; } int tscBuildAlterTableMsg(SSqlObj *pSql, SSqlInfo *pInfo) { - SAlterTableMsg *pAlterTableMsg; + SCMAlterTableMsg *pAlterTableMsg; char * pMsg; int msgLen = 0; int size = 0; @@ -2064,7 +1336,7 @@ int tscBuildAlterTableMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd * pCmd = &pSql->cmd; SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); size = tscEstimateAlterTableMsgLength(pCmd); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, size)) { @@ -2072,13 +1344,13 @@ int tscBuildAlterTableMsg(SSqlObj *pSql, SSqlInfo *pInfo) { return -1; } - pAlterTableMsg = (SAlterTableMsg *)pCmd->payload; + pAlterTableMsg = (SCMAlterTableMsg *)pCmd->payload; - tscGetDBInfoFromMeterId(pMeterMetaInfo->name, pAlterTableMsg->db); + tscGetDBInfoFromMeterId(pTableMetaInfo->name, pAlterTableMsg->db); SAlterTableSQL *pAlterInfo = pInfo->pAlterInfo; - strcpy(pAlterTableMsg->tableId, pMeterMetaInfo->name); + strcpy(pAlterTableMsg->tableId, pTableMetaInfo->name); pAlterTableMsg->type = htons(pAlterInfo->type); pAlterTableMsg->numOfCols = htons(tscNumOfFields(pQueryInfo)); @@ -2098,7 +1370,7 @@ int tscBuildAlterTableMsg(SSqlObj *pSql, SSqlInfo *pInfo) { msgLen = pMsg - (char*)pAlterTableMsg; pCmd->payloadLen = msgLen; - pCmd->msgType = TSDB_MSG_TYPE_ALTER_TABLE; + pCmd->msgType = TSDB_MSG_TYPE_CM_ALTER_TABLE; assert(msgLen + minMsgSize() <= size); @@ -2107,17 +1379,17 @@ int tscBuildAlterTableMsg(SSqlObj *pSql, SSqlInfo *pInfo) { int tscAlterDbMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd *pCmd = &pSql->cmd; - pCmd->payloadLen = sizeof(SAlterDbMsg); - pCmd->msgType = TSDB_MSG_TYPE_ALTER_DB; + pCmd->payloadLen = sizeof(SCMAlterDbMsg); + pCmd->msgType = TSDB_MSG_TYPE_CM_ALTER_DB; if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("%p failed to malloc for query msg", pSql); return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SAlterDbMsg *pAlterDbMsg = (SAlterDbMsg*)pCmd->payload; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - strcpy(pAlterDbMsg->db, pMeterMetaInfo->name); + SCMAlterDbMsg *pAlterDbMsg = (SCMAlterDbMsg*)pCmd->payload; + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + strcpy(pAlterDbMsg->db, pTableMetaInfo->name); return TSDB_CODE_SUCCESS; } @@ -2191,10 +1463,11 @@ static int tscLocalResultCommonBuilder(SSqlObj *pSql, int32_t numOfRes) { int tscProcessDescribeTableRsp(SSqlObj *pSql) { SSqlCmd * pCmd = &pSql->cmd; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); - - int32_t numOfRes = pMeterMetaInfo->pMeterMeta->numOfColumns + pMeterMetaInfo->pMeterMeta->numOfTags; + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + + int32_t numOfRes = tinfo.numOfColumns + tinfo.numOfTags; return tscLocalResultCommonBuilder(pSql, numOfRes); } @@ -2202,11 +1475,11 @@ int tscProcessTagRetrieveRsp(SSqlObj *pSql) { SSqlCmd *pCmd = &pSql->cmd; SQueryInfo * pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); int32_t numOfRes = 0; if (tscSqlExprGet(pQueryInfo, 0)->functionId == TSDB_FUNC_TAGPRJ) { - numOfRes = pMeterMetaInfo->pMetricMeta->numOfTables; + numOfRes = pTableMetaInfo->pMetricMeta->numOfTables; } else { numOfRes = 1; // for count function, there is only one output. } @@ -2243,15 +1516,15 @@ int tscProcessEmptyResultRsp(SSqlObj *pSql) { return tscLocalResultCommonBuilder int tscBuildConnectMsg(SSqlObj *pSql, SSqlInfo *pInfo) { STscObj *pObj = pSql->pTscObj; SSqlCmd *pCmd = &pSql->cmd; - pCmd->msgType = TSDB_MSG_TYPE_CONNECT; - pCmd->payloadLen = sizeof(SConnectMsg); + pCmd->msgType = TSDB_MSG_TYPE_CM_CONNECT; + pCmd->payloadLen = sizeof(SCMConnectMsg); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, pCmd->payloadLen)) { tscError("%p failed to malloc for query msg", pSql); return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SConnectMsg *pConnect = (SConnectMsg*)pCmd->payload; + SCMConnectMsg *pConnect = (SCMConnectMsg*)pCmd->payload; char *db; // ugly code to move the space db = strstr(pObj->db, TS_PATH_DELIMITER); @@ -2263,8 +1536,8 @@ int tscBuildConnectMsg(SSqlObj *pSql, SSqlInfo *pInfo) { return TSDB_CODE_SUCCESS; } -int tscBuildMeterMetaMsg(SSqlObj *pSql, SSqlInfo *pInfo) { - STableInfoMsg *pInfoMsg; +int tscBuildTableMetaMsg(SSqlObj *pSql, SSqlInfo *pInfo) { + SCMTableInfoMsg *pInfoMsg; char * pMsg; int msgLen = 0; @@ -2282,22 +1555,22 @@ int tscBuildMeterMetaMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SSqlCmd * pCmd = &pSql->cmd; SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, 0); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); - pInfoMsg = (STableInfoMsg *)pCmd->payload; - strcpy(pInfoMsg->tableId, pMeterMetaInfo->name); - pInfoMsg->createFlag = htons(pSql->cmd.createOnDemand ? 1 : 0); + pInfoMsg = (SCMTableInfoMsg *)pCmd->payload; + strcpy(pInfoMsg->tableId, pTableMetaInfo->name); + pInfoMsg->createFlag = htons(pSql->cmd.autoCreated ? 1 : 0); - pMsg = (char*)pInfoMsg + sizeof(STableInfoMsg); + pMsg = (char*)pInfoMsg + sizeof(SCMTableInfoMsg); - if (pSql->cmd.createOnDemand) { + if (pSql->cmd.autoCreated) { memcpy(pInfoMsg->tags, tmpData, sizeof(STagData)); pMsg += sizeof(STagData); } msgLen = pMsg - (char*)pInfoMsg; pCmd->payloadLen = msgLen; - pCmd->msgType = TSDB_MSG_TYPE_TABLE_META; + pCmd->msgType = TSDB_MSG_TYPE_CM_TABLE_META; tfree(tmpData); @@ -2307,7 +1580,7 @@ int tscBuildMeterMetaMsg(SSqlObj *pSql, SSqlInfo *pInfo) { /** * multi meter meta req pkg format: - * | SMgmtHead | SMultiTableInfoMsg | tableId0 | tableId1 | tableId2 | ...... + * | SMgmtHead | SCMMultiTableInfoMsg | tableId0 | tableId1 | tableId2 | ...... * no used 4B **/ int tscBuildMultiMeterMetaMsg(SSqlObj *pSql, SSqlInfo *pInfo) { @@ -2325,7 +1598,7 @@ int tscBuildMultiMeterMetaMsg(SSqlObj *pSql, SSqlInfo *pInfo) { SMgmtHead *pMgmt = (SMgmtHead *)(pCmd->payload + tsRpcHeadSize); memset(pMgmt->db, 0, TSDB_TABLE_ID_LEN); // server don't need the db - SMultiTableInfoMsg *pInfoMsg = (SMultiTableInfoMsg *)(pCmd->payload + tsRpcHeadSize + sizeof(SMgmtHead)); + SCMMultiTableInfoMsg *pInfoMsg = (SCMMultiTableInfoMsg *)(pCmd->payload + tsRpcHeadSize + sizeof(SMgmtHead)); pInfoMsg->numOfTables = htonl((int32_t)pCmd->count); if (pCmd->payloadLen > 0) { @@ -2334,8 +1607,8 @@ int tscBuildMultiMeterMetaMsg(SSqlObj *pSql, SSqlInfo *pInfo) { tfree(tmpData); - pCmd->payloadLen += sizeof(SMgmtHead) + sizeof(SMultiTableInfoMsg); - pCmd->msgType = TSDB_MSG_TYPE_MULTI_TABLE_META; + pCmd->payloadLen += sizeof(SMgmtHead) + sizeof(SCMMultiTableInfoMsg); + pCmd->msgType = TSDB_MSG_TYPE_CM_TABLES_META; assert(pCmd->payloadLen + minMsgSize() <= pCmd->allocSize); @@ -2381,7 +1654,7 @@ int tscBuildMetricMetaMsg(SSqlObj *pSql, SSqlInfo *pInfo) { STagCond *pTagCond = &pQueryInfo->tagCond; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, tableIndex); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableIndex); int32_t size = tscEstimateMetricMetaMsgSize(pCmd); if (TSDB_CODE_SUCCESS != tscAllocPayload(pCmd, size)) { @@ -2393,7 +1666,7 @@ int tscBuildMetricMetaMsg(SSqlObj *pSql, SSqlInfo *pInfo) { pMsg = pStart; SMgmtHead *pMgmt = (SMgmtHead *)pMsg; - tscGetDBInfoFromMeterId(pMeterMetaInfo->name, pMgmt->db); + tscGetDBInfoFromMeterId(pTableMetaInfo->name, pMgmt->db); pMsg += sizeof(SMgmtHead); @@ -2421,8 +1694,8 @@ int tscBuildMetricMetaMsg(SSqlObj *pSql, SSqlInfo *pInfo) { pMsg += sizeof(int16_t); for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { - pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, i); - uint64_t uid = pMeterMetaInfo->pMeterMeta->uid; + pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, i); + uint64_t uid = pTableMetaInfo->pTableMeta->uid; offset = pMsg - (char *)pMetaMsg; pMetaMsg->metaElem[i] = htonl(offset); @@ -2475,8 +1748,8 @@ int tscBuildMetricMetaMsg(SSqlObj *pSql, SSqlInfo *pInfo) { pElem->numOfGroupCols = 0; } else { pElem->numOfGroupCols = htons(pGroupby->numOfGroupCols); - for (int32_t j = 0; j < pMeterMetaInfo->numOfTags; ++j) { - pElem->tagCols[j] = htons(pMeterMetaInfo->tagColumnIndex[j]); + for (int32_t j = 0; j < pTableMetaInfo->numOfTags; ++j) { + pElem->tagCols[j] = htons(pTableMetaInfo->tagColumnIndex[j]); } if (pGroupby->numOfGroupCols != 0) { @@ -2500,8 +1773,8 @@ int tscBuildMetricMetaMsg(SSqlObj *pSql, SSqlInfo *pInfo) { } } - strcpy(pElem->tableId, pMeterMetaInfo->name); - pElem->numOfTags = htons(pMeterMetaInfo->numOfTags); + strcpy(pElem->tableId, pTableMetaInfo->name); + pElem->numOfTags = htons(pTableMetaInfo->numOfTags); int16_t len = pMsg - (char *)pElem; pElem->elemLen = htons(len); // redundant data for integrate check @@ -2509,7 +1782,7 @@ int tscBuildMetricMetaMsg(SSqlObj *pSql, SSqlInfo *pInfo) { msgLen = pMsg - pStart; pCmd->payloadLen = msgLen; - pCmd->msgType = TSDB_MSG_TYPE_STABLE_META; + pCmd->msgType = TSDB_MSG_TYPE_CM_STABLE_META; assert(msgLen + minMsgSize() <= size); return TSDB_CODE_SUCCESS; @@ -2566,96 +1839,90 @@ int tscBuildHeartBeatMsg(SSqlObj *pSql, SSqlInfo *pInfo) { msgLen = pMsg - pStart; pCmd->payloadLen = msgLen; - pCmd->msgType = TSDB_MSG_TYPE_HEARTBEAT; + pCmd->msgType = TSDB_MSG_TYPE_CM_HEARTBEAT; assert(msgLen + minMsgSize() <= size); return msgLen; } -int tscProcessMeterMetaRsp(SSqlObj *pSql) { - STableMeta *pMeta; - SSchema * pSchema; - - pMeta = (STableMeta *)pSql->res.pRsp; +int tscProcessTableMetaRsp(SSqlObj *pSql) { + STableMetaMsg *pMetaMsg = (STableMetaMsg *)pSql->res.pRsp; - pMeta->sid = htonl(pMeta->sid); - pMeta->sversion = htons(pMeta->sversion); - pMeta->vgid = htonl(pMeta->vgid); - pMeta->uid = htobe64(pMeta->uid); - pMeta->contLen = htons(pMeta->contLen); + pMetaMsg->sid = htonl(pMetaMsg->sid); + pMetaMsg->sversion = htons(pMetaMsg->sversion); + pMetaMsg->vgId = htonl(pMetaMsg->vgId); + pMetaMsg->uid = htobe64(pMetaMsg->uid); + pMetaMsg->contLen = htons(pMetaMsg->contLen); - if (pMeta->sid < 0 || pMeta->vgid < 0) { - tscError("invalid meter vgid:%d, sid%d", pMeta->vgid, pMeta->sid); + if (pMetaMsg->sid < 0 || pMetaMsg->vgId < 0) { + tscError("invalid meter vgId:%d, sid%d", pMetaMsg->vgId, pMetaMsg->sid); return TSDB_CODE_INVALID_VALUE; } - pMeta->numOfColumns = htons(pMeta->numOfColumns); + pMetaMsg->numOfColumns = htons(pMetaMsg->numOfColumns); - if (pMeta->numOfTags > TSDB_MAX_TAGS || pMeta->numOfTags < 0) { - tscError("invalid numOfTags:%d", pMeta->numOfTags); + if (pMetaMsg->numOfTags > TSDB_MAX_TAGS || pMetaMsg->numOfTags < 0) { + tscError("invalid numOfTags:%d", pMetaMsg->numOfTags); return TSDB_CODE_INVALID_VALUE; } - if (pMeta->numOfColumns > TSDB_MAX_COLUMNS || pMeta->numOfColumns <= 0) { - tscError("invalid numOfColumns:%d", pMeta->numOfColumns); + if (pMetaMsg->numOfColumns > TSDB_MAX_COLUMNS || pMetaMsg->numOfColumns <= 0) { + tscError("invalid numOfColumns:%d", pMetaMsg->numOfColumns); return TSDB_CODE_INVALID_VALUE; } for (int i = 0; i < TSDB_VNODES_SUPPORT; ++i) { - pMeta->vpeerDesc[i].vnode = htonl(pMeta->vpeerDesc[i].vnode); + pMetaMsg->vpeerDesc[i].vnode = htonl(pMetaMsg->vpeerDesc[i].vnode); } - pMeta->rowSize = 0; - pSchema = (SSchema *)(pSql->res.pRsp + sizeof(STableMeta)); + SSchema* pSchema = pMetaMsg->schema; - int32_t numOfTotalCols = pMeta->numOfColumns + pMeta->numOfTags; + int32_t numOfTotalCols = pMetaMsg->numOfColumns + pMetaMsg->numOfTags; for (int i = 0; i < numOfTotalCols; ++i) { pSchema->bytes = htons(pSchema->bytes); pSchema->colId = htons(pSchema->colId); - - // ignore the tags length - if (i < pMeta->numOfColumns) { - pMeta->rowSize += pSchema->bytes; - } pSchema++; } // rsp += numOfTotalCols * sizeof(SSchema); // // int32_t tagLen = 0; -// SSchema *pTagsSchema = tsGetTagSchema(pMeta); +// SSchema *pTagsSchema = tscGetTableTagSchema(pMetaMsg); // -// if (pMeta->tableType == TSDB_TABLE_TYPE_CHILD_TABLE) { -// for (int32_t i = 0; i < pMeta->numOfTags; ++i) { +// if (pMetaMsg->tableType == TSDB_CHILD_TABLE) { +// for (int32_t i = 0; i < pMetaMsg->numOfTags; ++i) { // tagLen += pTagsSchema[i].bytes; // } // } // // rsp += tagLen; -// int32_t size = (int32_t)(rsp - (char *)pMeta); +// int32_t size = (int32_t)(rsp - (char *)pMetaMsg); - // pMeta->index = rand() % TSDB_VNODES_SUPPORT; -// pMeta->index = 0; + size_t size = 0; + STableMeta* pTableMeta = tscCreateTableMetaFromMsg(pMetaMsg, &size); // todo add one more function: taosAddDataIfNotExists(); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(&pSql->cmd, 0, 0); - assert(pMeterMetaInfo->pMeterMeta == NULL); + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(&pSql->cmd, 0, 0); + assert(pTableMetaInfo->pTableMeta == NULL); - pMeterMetaInfo->pMeterMeta = (STableMeta *)taosAddDataIntoCache(tscCacheHandle, pMeterMetaInfo->name, (char *)pMeta, - pMeta->contLen, tsMeterMetaKeepTimer); + pTableMetaInfo->pTableMeta = + (STableMeta *) taosCachePut(tscCacheHandle, pTableMetaInfo->name, pTableMeta, size, tsMeterMetaKeepTimer); + // todo handle out of memory case - if (pMeterMetaInfo->pMeterMeta == NULL) return 0; + if (pTableMetaInfo->pTableMeta == NULL) { + return TSDB_CODE_CLI_OUT_OF_MEMORY; + } - return TSDB_CODE_OTHERS; + free(pTableMeta); + return TSDB_CODE_SUCCESS; } /** * multi meter meta rsp pkg format: - * | STaosRsp | ieType | SMultiTableInfoMsg | SMeterMeta0 | SSchema0 | SMeterMeta1 | SSchema1 | SMeterMeta2 | SSchema2 + * | STaosRsp | ieType | SCMMultiTableInfoMsg | SMeterMeta0 | SSchema0 | SMeterMeta1 | SSchema1 | SMeterMeta2 | SSchema2 * |...... 1B 1B 4B **/ int tscProcessMultiMeterMetaRsp(SSqlObj *pSql) { - SSchema *pSchema; uint8_t ieType; int32_t totalNum; int32_t i; @@ -2672,9 +1939,9 @@ int tscProcessMultiMeterMetaRsp(SSqlObj *pSql) { rsp++; - SMultiTableInfoMsg *pInfo = (SMultiTableInfoMsg *)rsp; + SCMMultiTableInfoMsg *pInfo = (SCMMultiTableInfoMsg *)rsp; totalNum = htonl(pInfo->numOfTables); - rsp += sizeof(SMultiTableInfoMsg); + rsp += sizeof(SCMMultiTableInfoMsg); for (i = 0; i < totalNum; i++) { SMultiTableMeta *pMultiMeta = (SMultiTableMeta *)rsp; @@ -2682,77 +1949,78 @@ int tscProcessMultiMeterMetaRsp(SSqlObj *pSql) { pMeta->sid = htonl(pMeta->sid); pMeta->sversion = htons(pMeta->sversion); - pMeta->vgid = htonl(pMeta->vgid); + pMeta->vgId = htonl(pMeta->vgId); pMeta->uid = htobe64(pMeta->uid); - if (pMeta->sid <= 0 || pMeta->vgid < 0) { - tscError("invalid meter vgid:%d, sid%d", pMeta->vgid, pMeta->sid); - pSql->res.code = TSDB_CODE_INVALID_VALUE; - pSql->res.numOfTotal = i; - return TSDB_CODE_OTHERS; - } - - pMeta->numOfColumns = htons(pMeta->numOfColumns); - - if (pMeta->numOfTags > TSDB_MAX_TAGS || pMeta->numOfTags < 0) { - tscError("invalid tag value count:%d", pMeta->numOfTags); - pSql->res.code = TSDB_CODE_INVALID_VALUE; - pSql->res.numOfTotal = i; - return TSDB_CODE_OTHERS; - } - - if (pMeta->numOfTags > TSDB_MAX_TAGS || pMeta->numOfTags < 0) { - tscError("invalid numOfTags:%d", pMeta->numOfTags); - pSql->res.code = TSDB_CODE_INVALID_VALUE; - pSql->res.numOfTotal = i; - return TSDB_CODE_OTHERS; - } - - if (pMeta->numOfColumns > TSDB_MAX_COLUMNS || pMeta->numOfColumns < 0) { - tscError("invalid numOfColumns:%d", pMeta->numOfColumns); + if (pMeta->sid <= 0 || pMeta->vgId < 0) { + tscError("invalid meter vgId:%d, sid%d", pMeta->vgId, pMeta->sid); pSql->res.code = TSDB_CODE_INVALID_VALUE; pSql->res.numOfTotal = i; return TSDB_CODE_OTHERS; } - for (int j = 0; j < TSDB_VNODES_SUPPORT; ++j) { - pMeta->vpeerDesc[j].vnode = htonl(pMeta->vpeerDesc[j].vnode); - } - - pMeta->rowSize = 0; - rsp += sizeof(SMultiTableMeta); - pSchema = (SSchema *)rsp; - - int32_t numOfTotalCols = pMeta->numOfColumns + pMeta->numOfTags; - for (int j = 0; j < numOfTotalCols; ++j) { - pSchema->bytes = htons(pSchema->bytes); - pSchema->colId = htons(pSchema->colId); - - // ignore the tags length - if (j < pMeta->numOfColumns) { - pMeta->rowSize += pSchema->bytes; - } - pSchema++; - } - - rsp += numOfTotalCols * sizeof(SSchema); - - int32_t tagLen = 0; - SSchema *pTagsSchema = tsGetTagSchema(pMeta); - - if (pMeta->tableType == TSDB_TABLE_TYPE_CHILD_TABLE) { - for (int32_t j = 0; j < pMeta->numOfTags; ++j) { - tagLen += pTagsSchema[j].bytes; - } - } - - rsp += tagLen; - int32_t size = (int32_t)(rsp - ((char *)pMeta)); // Consistent with STableMeta in cache - - pMeta->index = 0; - (void)taosAddDataIntoCache(tscCacheHandle, pMeta->tableId, (char *)pMeta, size, tsMeterMetaKeepTimer); + // pMeta->numOfColumns = htons(pMeta->numOfColumns); + // + // if (pMeta->numOfTags > TSDB_MAX_TAGS || pMeta->numOfTags < 0) { + // tscError("invalid tag value count:%d", pMeta->numOfTags); + // pSql->res.code = TSDB_CODE_INVALID_VALUE; + // pSql->res.numOfTotal = i; + // return TSDB_CODE_OTHERS; + // } + // + // if (pMeta->numOfTags > TSDB_MAX_TAGS || pMeta->numOfTags < 0) { + // tscError("invalid numOfTags:%d", pMeta->numOfTags); + // pSql->res.code = TSDB_CODE_INVALID_VALUE; + // pSql->res.numOfTotal = i; + // return TSDB_CODE_OTHERS; + // } + // + // if (pMeta->numOfColumns > TSDB_MAX_COLUMNS || pMeta->numOfColumns < 0) { + // tscError("invalid numOfColumns:%d", pMeta->numOfColumns); + // pSql->res.code = TSDB_CODE_INVALID_VALUE; + // pSql->res.numOfTotal = i; + // return TSDB_CODE_OTHERS; + // } + // + // for (int j = 0; j < TSDB_VNODES_SUPPORT; ++j) { + // pMeta->vpeerDesc[j].vnode = htonl(pMeta->vpeerDesc[j].vnode); + // } + // + // pMeta->rowSize = 0; + // rsp += sizeof(SMultiTableMeta); + // pSchema = (SSchema *)rsp; + // + // int32_t numOfTotalCols = pMeta->numOfColumns + pMeta->numOfTags; + // for (int j = 0; j < numOfTotalCols; ++j) { + // pSchema->bytes = htons(pSchema->bytes); + // pSchema->colId = htons(pSchema->colId); + // + // // ignore the tags length + // if (j < pMeta->numOfColumns) { + // pMeta->rowSize += pSchema->bytes; + // } + // pSchema++; + // } + // + // rsp += numOfTotalCols * sizeof(SSchema); + // + // int32_t tagLen = 0; + // SSchema *pTagsSchema = tscGetTableTagSchema(pMeta); + // + // if (pMeta->tableType == TSDB_CHILD_TABLE) { + // for (int32_t j = 0; j < pMeta->numOfTags; ++j) { + // tagLen += pTagsSchema[j].bytes; + // } + // } + // + // rsp += tagLen; + // int32_t size = (int32_t)(rsp - ((char *)pMeta)); // Consistent with STableMeta in cache + // + // pMeta->index = 0; + // (void)taosCachePut(tscCacheHandle, pMeta->tableId, (char *)pMeta, size, tsMeterMetaKeepTimer); + // } } - + pSql->res.code = TSDB_CODE_SUCCESS; pSql->res.numOfTotal = i; tscTrace("%p load multi-metermeta resp complete num:%d", pSql, pSql->res.numOfTotal); @@ -2800,7 +2068,7 @@ int tscProcessMetricMetaRsp(SSqlObj *pSql) { pMeta->numOfVnodes = htonl(pMeta->numOfVnodes); pMeta->tagLen = htons(pMeta->tagLen); - size += pMeta->numOfVnodes * sizeof(SVnodeSidList *) + pMeta->numOfTables * sizeof(STableSidExtInfo *); + size += pMeta->numOfVnodes * sizeof(SVnodeSidList *) + pMeta->numOfTables * sizeof(STableIdInfo *); char *pBuf = calloc(1, size); if (pBuf == NULL) { @@ -2826,16 +2094,16 @@ int tscProcessMetricMetaRsp(SSqlObj *pSql) { tscTrace("%p metricmeta:vid:%d,numOfTables:%d", pSql, i, pLists->numOfSids); - pBuf += sizeof(SVnodeSidList) + sizeof(STableSidExtInfo *) * pSidLists->numOfSids; + pBuf += sizeof(SVnodeSidList) + sizeof(STableIdInfo *) * pSidLists->numOfSids; rsp += sizeof(SVnodeSidList); - size_t elemSize = sizeof(STableSidExtInfo) + pNewMetricMeta->tagLen; + size_t elemSize = sizeof(STableIdInfo) + pNewMetricMeta->tagLen; for (int32_t j = 0; j < pSidLists->numOfSids; ++j) { pLists->pSidExtInfoList[j] = pBuf - (char *)pLists; memcpy(pBuf, rsp, elemSize); - ((STableSidExtInfo *)pBuf)->uid = htobe64(((STableSidExtInfo *)pBuf)->uid); - ((STableSidExtInfo *)pBuf)->sid = htonl(((STableSidExtInfo *)pBuf)->sid); + ((STableIdInfo *)pBuf)->uid = htobe64(((STableIdInfo *)pBuf)->uid); + ((STableIdInfo *)pBuf)->sid = htonl(((STableIdInfo *)pBuf)->sid); rsp += elemSize; pBuf += elemSize; @@ -2849,22 +2117,22 @@ int tscProcessMetricMetaRsp(SSqlObj *pSql) { for (int32_t i = 0; i < num; ++i) { char name[TSDB_MAX_TAGS_LEN + 1] = {0}; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, i); - tscGetMetricMetaCacheKey(pQueryInfo, name, pMeterMetaInfo->pMeterMeta->uid); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, i); + tscGetMetricMetaCacheKey(pQueryInfo, name, pTableMetaInfo->pTableMeta->uid); #ifdef _DEBUG_VIEW printf("generate the metric key:%s, index:%d\n", name, i); #endif // release the used metricmeta - taosRemoveDataFromCache(tscCacheHandle, (void **)&(pMeterMetaInfo->pMetricMeta), false); + taosCacheRelease(tscCacheHandle, (void **)&(pTableMetaInfo->pMetricMeta), false); - pMeterMetaInfo->pMetricMeta = (SSuperTableMeta *)taosAddDataIntoCache(tscCacheHandle, name, (char *)metricMetaList[i], + pTableMetaInfo->pMetricMeta = (SSuperTableMeta *)taosCachePut(tscCacheHandle, name, (char *)metricMetaList[i], sizes[i], tsMetricMetaKeepTimer); tfree(metricMetaList[i]); // failed to put into cache - if (pMeterMetaInfo->pMetricMeta == NULL) { + if (pTableMetaInfo->pMetricMeta == NULL) { pSql->res.code = TSDB_CODE_CLI_OUT_OF_MEMORY; goto _error_clean; } @@ -2886,30 +2154,30 @@ _error_clean: * current process do not use the cache at all */ int tscProcessShowRsp(SSqlObj *pSql) { - STableMeta * pMeta; - SShowRsp *pShow; + STableMetaMsg * pMetaMsg; + SCMShowRsp *pShow; SSchema * pSchema; char key[20]; SSqlRes *pRes = &pSql->res; SSqlCmd *pCmd = &pSql->cmd; - SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); //? + SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); - pShow = (SShowRsp *)pRes->pRsp; + pShow = (SCMShowRsp *)pRes->pRsp; pShow->qhandle = htobe64(pShow->qhandle); pRes->qhandle = pShow->qhandle; tscResetForNextRetrieve(pRes); - pMeta = &(pShow->tableMeta); + pMetaMsg = &(pShow->tableMeta); - pMeta->numOfColumns = ntohs(pMeta->numOfColumns); + pMetaMsg->numOfColumns = ntohs(pMetaMsg->numOfColumns); - pSchema = (SSchema *)((char *)pMeta + sizeof(STableMeta)); - pMeta->sid = ntohs(pMeta->sid); - for (int i = 0; i < pMeta->numOfColumns; ++i) { + pSchema = pMetaMsg->schema; + pMetaMsg->sid = ntohs(pMetaMsg->sid); + for (int i = 0; i < pMetaMsg->numOfColumns; ++i) { pSchema->bytes = htons(pSchema->bytes); pSchema++; } @@ -2917,27 +2185,31 @@ int tscProcessShowRsp(SSqlObj *pSql) { key[0] = pCmd->msgType + 'a'; strcpy(key + 1, "showlist"); - taosRemoveDataFromCache(tscCacheHandle, (void *)&(pMeterMetaInfo->pMeterMeta), false); - - int32_t size = pMeta->numOfColumns * sizeof(SSchema) + sizeof(STableMeta); - pMeterMetaInfo->pMeterMeta = - (STableMeta *)taosAddDataIntoCache(tscCacheHandle, key, (char *)pMeta, size, tsMeterMetaKeepTimer); + taosCacheRelease(tscCacheHandle, (void *)&(pTableMetaInfo->pTableMeta), false); + size_t size = 0; + STableMeta* pTableMeta = tscCreateTableMetaFromMsg(pMetaMsg, &size); + + pTableMetaInfo->pTableMeta = + (STableMeta *)taosCachePut(tscCacheHandle, key, (char *)pTableMeta, size, tsMeterMetaKeepTimer); + pCmd->numOfCols = pQueryInfo->fieldsInfo.numOfOutputCols; - SSchema *pMeterSchema = tsGetSchema(pMeterMetaInfo->pMeterMeta); + SSchema *pTableSchema = tscGetTableSchema(pTableMetaInfo->pTableMeta); - tscColumnBaseInfoReserve(&pQueryInfo->colList, pMeta->numOfColumns); + tscColumnBaseInfoReserve(&pQueryInfo->colList, pMetaMsg->numOfColumns); SColumnIndex index = {0}; - for (int16_t i = 0; i < pMeta->numOfColumns; ++i) { + for (int16_t i = 0; i < pMetaMsg->numOfColumns; ++i) { index.columnIndex = i; tscColumnBaseInfoInsert(pQueryInfo, &index); - tscFieldInfoSetValFromSchema(&pQueryInfo->fieldsInfo, i, &pMeterSchema[i]); + tscFieldInfoSetValFromSchema(&pQueryInfo->fieldsInfo, i, &pTableSchema[i]); pQueryInfo->fieldsInfo.pSqlExpr[i] = tscSqlExprInsert(pQueryInfo, i, TSDB_FUNC_TS_DUMMY, &index, - pMeterSchema[i].type, pMeterSchema[i].bytes, pMeterSchema[i].bytes); + pTableSchema[i].type, pTableSchema[i].bytes, pTableSchema[i].bytes); } tscFieldInfoCalOffset(pQueryInfo); + + tfree(pTableMeta); return 0; } @@ -2946,7 +2218,7 @@ int tscProcessConnectRsp(SSqlObj *pSql) { STscObj *pObj = pSql->pTscObj; SSqlRes *pRes = &pSql->res; - SConnectRsp *pConnect = (SConnectRsp *)pRes->pRsp; + SCMConnectRsp *pConnect = (SCMConnectRsp *)pRes->pRsp; strcpy(pObj->acctId, pConnect->acctId); // copy acctId from response int32_t len = sprintf(temp, "%s%s%s", pObj->acctId, TS_PATH_DELIMITER, pObj->db); @@ -2954,7 +2226,7 @@ int tscProcessConnectRsp(SSqlObj *pSql) { strncpy(pObj->db, temp, tListLen(pObj->db)); // SIpList * pIpList; -// char *rsp = pRes->pRsp + sizeof(SConnectRsp); +// char *rsp = pRes->pRsp + sizeof(SCMConnectRsp); // pIpList = (SIpList *)rsp; // tscSetMgmtIpList(pIpList); @@ -2968,22 +2240,22 @@ int tscProcessConnectRsp(SSqlObj *pSql) { int tscProcessUseDbRsp(SSqlObj *pSql) { STscObj * pObj = pSql->pTscObj; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(&pSql->cmd, 0, 0); + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(&pSql->cmd, 0, 0); - strcpy(pObj->db, pMeterMetaInfo->name); + strcpy(pObj->db, pTableMetaInfo->name); return 0; } int tscProcessDropDbRsp(SSqlObj *UNUSED_PARAM(pSql)) { - taosClearDataCache(tscCacheHandle); + taosCacheEmpty(tscCacheHandle); return 0; } int tscProcessDropTableRsp(SSqlObj *pSql) { - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(&pSql->cmd, 0, 0); + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(&pSql->cmd, 0, 0); - STableMeta *pMeterMeta = taosGetDataFromCache(tscCacheHandle, pMeterMetaInfo->name); - if (pMeterMeta == NULL) { + STableMeta *pTableMeta = taosCacheAcquireByName(tscCacheHandle, pTableMetaInfo->name); + if (pTableMeta == NULL) { /* not in cache, abort */ return 0; } @@ -2995,37 +2267,37 @@ int tscProcessDropTableRsp(SSqlObj *pSql) { * The cached information is expired, however, we may have lost the ref of original meter. So, clear whole cache * instead. */ - tscTrace("%p force release metermeta after drop table:%s", pSql, pMeterMetaInfo->name); - taosRemoveDataFromCache(tscCacheHandle, (void **)&pMeterMeta, true); + tscTrace("%p force release metermeta after drop table:%s", pSql, pTableMetaInfo->name); + taosCacheRelease(tscCacheHandle, (void **)&pTableMeta, true); - if (pMeterMetaInfo->pMeterMeta) { - taosRemoveDataFromCache(tscCacheHandle, (void **)&(pMeterMetaInfo->pMeterMeta), true); - taosRemoveDataFromCache(tscCacheHandle, (void **)&(pMeterMetaInfo->pMetricMeta), true); + if (pTableMetaInfo->pTableMeta) { + taosCacheRelease(tscCacheHandle, (void **)&(pTableMetaInfo->pTableMeta), true); + taosCacheRelease(tscCacheHandle, (void **)&(pTableMetaInfo->pMetricMeta), true); } return 0; } int tscProcessAlterTableMsgRsp(SSqlObj *pSql) { - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(&pSql->cmd, 0, 0); + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(&pSql->cmd, 0, 0); - STableMeta *pMeterMeta = taosGetDataFromCache(tscCacheHandle, pMeterMetaInfo->name); - if (pMeterMeta == NULL) { /* not in cache, abort */ + STableMeta *pTableMeta = taosCacheAcquireByName(tscCacheHandle, pTableMetaInfo->name); + if (pTableMeta == NULL) { /* not in cache, abort */ return 0; } - tscTrace("%p force release metermeta in cache after alter-table: %s", pSql, pMeterMetaInfo->name); - taosRemoveDataFromCache(tscCacheHandle, (void **)&pMeterMeta, true); + tscTrace("%p force release metermeta in cache after alter-table: %s", pSql, pTableMetaInfo->name); + taosCacheRelease(tscCacheHandle, (void **)&pTableMeta, true); - if (pMeterMetaInfo->pMeterMeta) { - bool isSuperTable = UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo); + if (pTableMetaInfo->pTableMeta) { + bool isSuperTable = UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo); - taosRemoveDataFromCache(tscCacheHandle, (void **)&(pMeterMetaInfo->pMeterMeta), true); - taosRemoveDataFromCache(tscCacheHandle, (void **)&(pMeterMetaInfo->pMetricMeta), true); + taosCacheRelease(tscCacheHandle, (void **)&(pTableMetaInfo->pTableMeta), true); + taosCacheRelease(tscCacheHandle, (void **)&(pTableMetaInfo->pMetricMeta), true); if (isSuperTable) { // if it is a super table, reset whole query cache - tscTrace("%p reset query cache since table:%s is stable", pSql, pMeterMetaInfo->name); - taosClearDataCache(tscCacheHandle); + tscTrace("%p reset query cache since table:%s is stable", pSql, pTableMetaInfo->name); + taosCacheEmpty(tscCacheHandle); } } @@ -3105,11 +2377,9 @@ int tscProcessRetrieveRspFromLocal(SSqlObj *pSql) { return 0; } -void tscMeterMetaCallBack(void *param, TAOS_RES *res, int code); - -static int32_t doGetMeterMetaFromServer(SSqlObj *pSql, SMeterMetaInfo *pMeterMetaInfo) { - int32_t code = TSDB_CODE_SUCCESS; +void tscTableMetaCallBack(void *param, TAOS_RES *res, int code); +static int32_t doGetMeterMetaFromServer(SSqlObj *pSql, STableMetaInfo *pTableMetaInfo) { SSqlObj *pNew = calloc(1, sizeof(SSqlObj)); if (NULL == pNew) { tscError("%p malloc failed for new sqlobj to get meter meta", pSql); @@ -3125,7 +2395,7 @@ static int32_t doGetMeterMetaFromServer(SSqlObj *pSql, SMeterMetaInfo *pMeterMet SQueryInfo *pNewQueryInfo = NULL; tscGetQueryInfoDetailSafely(&pNew->cmd, 0, &pNewQueryInfo); - pNew->cmd.createOnDemand = pSql->cmd.createOnDemand; // create table if not exists + pNew->cmd.autoCreated = pSql->cmd.autoCreated; // create table if not exists if (TSDB_CODE_SUCCESS != tscAllocPayload(&pNew->cmd, TSDB_DEFAULT_PAYLOAD_SIZE)) { tscError("%p malloc failed for payload to get meter meta", pSql); free(pNew); @@ -3133,73 +2403,51 @@ static int32_t doGetMeterMetaFromServer(SSqlObj *pSql, SMeterMetaInfo *pMeterMet return TSDB_CODE_CLI_OUT_OF_MEMORY; } - SMeterMetaInfo *pNewMeterMetaInfo = tscAddEmptyMeterMetaInfo(pNewQueryInfo); + STableMetaInfo *pNewMeterMetaInfo = tscAddEmptyMetaInfo(pNewQueryInfo); assert(pNew->cmd.numOfClause == 1 && pNewQueryInfo->numOfTables == 1); - strcpy(pNewMeterMetaInfo->name, pMeterMetaInfo->name); + strcpy(pNewMeterMetaInfo->name, pTableMetaInfo->name); memcpy(pNew->cmd.payload, pSql->cmd.payload, TSDB_DEFAULT_PAYLOAD_SIZE); // tag information if table does not exists. tscTrace("%p new pSqlObj:%p to get tableMeta", pSql, pNew); - if (pSql->fp == NULL) { - tsem_init(&pNew->rspSem, 0, 0); - tsem_init(&pNew->emptyRspSem, 0, 1); - - code = tscProcessSql(pNew); - - /* - * Update cache only on succeeding in getting metermeta. - * Transfer the ownership of metermeta to the new object, instead of invoking the release/acquire routine - */ - if (code == TSDB_CODE_SUCCESS) { - pMeterMetaInfo->pMeterMeta = taosTransferDataInCache(tscCacheHandle, (void**) &pNewMeterMetaInfo->pMeterMeta); - assert(pMeterMetaInfo->pMeterMeta != NULL); - } - - tscTrace("%p get meter meta complete, code:%d, pMeterMeta:%p", pSql, code, pMeterMetaInfo->pMeterMeta); - tscFreeSqlObj(pNew); - - } else { - pNew->fp = tscMeterMetaCallBack; - pNew->param = pSql; - pNew->sqlstr = strdup(pSql->sqlstr); + pNew->fp = tscTableMetaCallBack; + pNew->param = pSql; - code = tscProcessSql(pNew); - if (code == TSDB_CODE_SUCCESS) { - code = TSDB_CODE_ACTION_IN_PROGRESS; - } + int32_t code = tscProcessSql(pNew); + if (code == TSDB_CODE_SUCCESS) { + code = TSDB_CODE_ACTION_IN_PROGRESS; } return code; } -int tscGetMeterMeta(SSqlObj *pSql, SMeterMetaInfo *pMeterMetaInfo) { - assert(strlen(pMeterMetaInfo->name) != 0); +int32_t tscGetTableMeta(SSqlObj *pSql, STableMetaInfo *pTableMetaInfo) { + assert(strlen(pTableMetaInfo->name) != 0); - // If this SMeterMetaInfo owns a metermeta, release it first - if (pMeterMetaInfo->pMeterMeta != NULL) { - taosRemoveDataFromCache(tscCacheHandle, (void **)&(pMeterMetaInfo->pMeterMeta), false); + // If this STableMetaInfo owns a metermeta, release it first + if (pTableMetaInfo->pTableMeta != NULL) { + taosCacheRelease(tscCacheHandle, (void **)&(pTableMetaInfo->pTableMeta), false); } - pMeterMetaInfo->pMeterMeta = (STableMeta *)taosGetDataFromCache(tscCacheHandle, pMeterMetaInfo->name); - if (pMeterMetaInfo->pMeterMeta != NULL) { - STableMeta *pMeterMeta = pMeterMetaInfo->pMeterMeta; - - tscTrace("%p retrieve tableMeta from cache, the number of columns:%d, numOfTags:%d", pSql, pMeterMeta->numOfColumns, - pMeterMeta->numOfTags); + pTableMetaInfo->pTableMeta = (STableMeta *)taosCacheAcquireByName(tscCacheHandle, pTableMetaInfo->name); + if (pTableMetaInfo->pTableMeta != NULL) { + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + tscTrace("%p retrieve tableMeta from cache, the number of columns:%d, numOfTags:%d", pSql, tinfo.numOfColumns, + tinfo.numOfTags); return TSDB_CODE_SUCCESS; } /* * for async insert operation, release data block buffer before issue new object to get metermeta - * because in metermeta callback function, the tscParse function will generate the submit data blocks + * because in table meta callback function, the tscParse function will generate the submit data blocks */ - return doGetMeterMetaFromServer(pSql, pMeterMetaInfo); + return doGetMeterMetaFromServer(pSql, pTableMetaInfo); } -int tscGetMeterMetaEx(SSqlObj *pSql, SMeterMetaInfo *pMeterMetaInfo, bool createIfNotExists) { - pSql->cmd.createOnDemand = createIfNotExists; - return tscGetMeterMeta(pSql, pMeterMetaInfo); +int tscGetMeterMetaEx(SSqlObj *pSql, STableMetaInfo *pTableMetaInfo, bool createIfNotExists) { + pSql->cmd.autoCreated = createIfNotExists; + return tscGetTableMeta(pSql, pTableMetaInfo); } /* @@ -3228,35 +2476,26 @@ int tscRenewMeterMeta(SSqlObj *pSql, char *tableId) { SSqlCmd *pCmd = &pSql->cmd; SQueryInfo * pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - - // enforce the renew metermeta operation in async model - if (pSql->fp == NULL) pSql->fp = (void *)0x1; + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); /* * 1. only update the metermeta in force model metricmeta is not updated * 2. if get metermeta failed, still get the metermeta */ - if (pMeterMetaInfo->pMeterMeta == NULL || !tscQueryOnMetric(pCmd)) { - if (pMeterMetaInfo->pMeterMeta) { + if (pTableMetaInfo->pTableMeta == NULL || !tscQueryOnMetric(pCmd)) { + if (pTableMetaInfo->pTableMeta) { tscTrace("%p update meter meta, old: numOfTags:%d, numOfCols:%d, uid:%" PRId64 ", addr:%p", pSql, - pMeterMetaInfo->numOfTags, pCmd->numOfCols, pMeterMetaInfo->pMeterMeta->uid, pMeterMetaInfo->pMeterMeta); + pTableMetaInfo->numOfTags, pCmd->numOfCols, pTableMetaInfo->pTableMeta->uid, pTableMetaInfo->pTableMeta); } tscWaitingForCreateTable(pCmd); - taosRemoveDataFromCache(tscCacheHandle, (void **)&(pMeterMetaInfo->pMeterMeta), true); + taosCacheRelease(tscCacheHandle, (void **)&(pTableMetaInfo->pTableMeta), true); - code = doGetMeterMetaFromServer(pSql, pMeterMetaInfo); // todo ?? + code = doGetMeterMetaFromServer(pSql, pTableMetaInfo); // todo ?? } else { tscTrace("%p metric query not update metric meta, numOfTags:%d, numOfCols:%d, uid:%" PRId64 ", addr:%p", pSql, - pMeterMetaInfo->pMeterMeta->numOfTags, pCmd->numOfCols, pMeterMetaInfo->pMeterMeta->uid, - pMeterMetaInfo->pMeterMeta); - } - - if (code != TSDB_CODE_ACTION_IN_PROGRESS) { - if (pSql->fp == (void *)0x1) { - pSql->fp = NULL; - } + tscGetNumOfTags(pTableMetaInfo->pTableMeta), pCmd->numOfCols, pTableMetaInfo->pTableMeta->uid, + pTableMetaInfo->pTableMeta); } return code; @@ -3275,17 +2514,17 @@ int tscGetMetricMeta(SSqlObj *pSql, int32_t clauseIndex) { for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { char tagstr[TSDB_MAX_TAGS_LEN + 1] = {0}; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, i); - tscGetMetricMetaCacheKey(pQueryInfo, tagstr, pMeterMetaInfo->pMeterMeta->uid); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, i); + tscGetMetricMetaCacheKey(pQueryInfo, tagstr, pTableMetaInfo->pTableMeta->uid); - taosRemoveDataFromCache(tscCacheHandle, (void **)&(pMeterMetaInfo->pMetricMeta), false); + taosCacheRelease(tscCacheHandle, (void **)&(pTableMetaInfo->pMetricMeta), false); - SSuperTableMeta *ppMeta = (SSuperTableMeta *)taosGetDataFromCache(tscCacheHandle, tagstr); + SSuperTableMeta *ppMeta = (SSuperTableMeta *)taosCacheAcquireByName(tscCacheHandle, tagstr); if (ppMeta == NULL) { required = true; break; } else { - pMeterMetaInfo->pMetricMeta = ppMeta; + pTableMetaInfo->pMetricMeta = ppMeta; } } @@ -3306,10 +2545,10 @@ int tscGetMetricMeta(SSqlObj *pSql, int32_t clauseIndex) { } for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { - SMeterMetaInfo *pMMInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, i); + STableMetaInfo *pMMInfo = tscGetMetaInfo(pQueryInfo, i); - STableMeta *pMeterMeta = taosGetDataFromCache(tscCacheHandle, pMMInfo->name); - tscAddMeterMetaInfo(pNewQueryInfo, pMMInfo->name, pMeterMeta, NULL, pMMInfo->numOfTags, pMMInfo->tagColumnIndex); + STableMeta *pTableMeta = taosCacheAcquireByName(tscCacheHandle, pMMInfo->name); + tscAddMeterMetaInfo(pNewQueryInfo, pMMInfo->name, pTableMeta, NULL, pMMInfo->numOfTags, pMMInfo->tagColumnIndex); } if ((code = tscAllocPayload(&pNew->cmd, TSDB_DEFAULT_PAYLOAD_SIZE)) != TSDB_CODE_SUCCESS) { @@ -3336,42 +2575,17 @@ int tscGetMetricMeta(SSqlObj *pSql, int32_t clauseIndex) { // } tscTrace("%p allocate new pSqlObj:%p to get metricMeta", pSql, pNew); - if (pSql->fp == NULL) { - tsem_init(&pNew->rspSem, 0, 0); - tsem_init(&pNew->emptyRspSem, 0, 1); - - code = tscProcessSql(pNew); - - if (code == TSDB_CODE_SUCCESS) {//todo optimize the performance - for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { - char tagstr[TSDB_MAX_TAGS_LEN] = {0}; - - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, i); - tscGetMetricMetaCacheKey(pQueryInfo, tagstr, pMeterMetaInfo->pMeterMeta->uid); - -#ifdef _DEBUG_VIEW - printf("create metric key:%s, index:%d\n", tagstr, i); -#endif - - taosRemoveDataFromCache(tscCacheHandle, (void **)&(pMeterMetaInfo->pMetricMeta), false); - pMeterMetaInfo->pMetricMeta = (SSuperTableMeta *)taosGetDataFromCache(tscCacheHandle, tagstr); - } - } - - tscFreeSqlObj(pNew); - } else { - pNew->fp = tscMeterMetaCallBack; - pNew->param = pSql; - code = tscProcessSql(pNew); - if (code == TSDB_CODE_SUCCESS) { - code = TSDB_CODE_ACTION_IN_PROGRESS; - } + pNew->fp = tscTableMetaCallBack; + pNew->param = pSql; + code = tscProcessSql(pNew); + if (code == TSDB_CODE_SUCCESS) { + code = TSDB_CODE_ACTION_IN_PROGRESS; } return code; } -void tscInitMsgs() { +void tscInitMsgsFp() { tscBuildMsg[TSDB_SQL_SELECT] = tscBuildQueryMsg; tscBuildMsg[TSDB_SQL_INSERT] = tscBuildSubmitMsg; tscBuildMsg[TSDB_SQL_FETCH] = tscBuildRetrieveMsg; @@ -3396,7 +2610,7 @@ void tscInitMsgs() { tscBuildMsg[TSDB_SQL_CONNECT] = tscBuildConnectMsg; tscBuildMsg[TSDB_SQL_USE_DB] = tscBuildUseDbMsg; - tscBuildMsg[TSDB_SQL_META] = tscBuildMeterMetaMsg; + tscBuildMsg[TSDB_SQL_META] = tscBuildTableMetaMsg; tscBuildMsg[TSDB_SQL_METRIC] = tscBuildMetricMetaMsg; tscBuildMsg[TSDB_SQL_MULTI_META] = tscBuildMultiMeterMetaMsg; @@ -3414,7 +2628,7 @@ void tscInitMsgs() { tscProcessMsgRsp[TSDB_SQL_DROP_TABLE] = tscProcessDropTableRsp; tscProcessMsgRsp[TSDB_SQL_CONNECT] = tscProcessConnectRsp; tscProcessMsgRsp[TSDB_SQL_USE_DB] = tscProcessUseDbRsp; - tscProcessMsgRsp[TSDB_SQL_META] = tscProcessMeterMetaRsp; + tscProcessMsgRsp[TSDB_SQL_META] = tscProcessTableMetaRsp; tscProcessMsgRsp[TSDB_SQL_METRIC] = tscProcessMetricMetaRsp; tscProcessMsgRsp[TSDB_SQL_MULTI_META] = tscProcessMultiMeterMetaRsp; diff --git a/src/client/src/tscSql.c b/src/client/src/tscSql.c index 465263a439773f59fc07bbfe8452c3dd521d6d1f..5d93e44c7712910c8a34805def9dfe9c2ed0aa82 100644 --- a/src/client/src/tscSql.c +++ b/src/client/src/tscSql.c @@ -13,51 +13,57 @@ * along with this program. If not, see . */ -#include #include "hash.h" #include "os.h" +#include "qast.h" #include "tcache.h" #include "tlog.h" #include "tnote.h" #include "trpc.h" -#include "tscJoinProcess.h" #include "tscProfile.h" -#include "tscSQLParser.h" #include "tscSecondaryMerge.h" +#include "tscSubquery.h" #include "tscUtil.h" #include "tsclient.h" #include "tscompression.h" #include "tsocket.h" #include "ttimer.h" +#include "ttokendef.h" #include "tutil.h" -TAOS *taos_connect_imp(const char *ip, const char *user, const char *pass, const char *db, uint16_t port, - void (*fp)(void *, TAOS_RES *, int), void *param, void **taos) { - STscObj *pObj; +static bool validImpl(const char* str, size_t maxsize) { + if (str == NULL) { + return false; + } + + size_t len = strlen(str); + if (len <= 0 || len > maxsize) { + return false; + } + + return true; +} +static bool validUserName(const char* user) { + return validImpl(user, TSDB_USER_LEN); +} - taos_init(); +static bool validPassword(const char* passwd) { + return validImpl(passwd, TSDB_PASSWORD_LEN); +} - if (user == NULL) { +STscObj *taosConnectImpl(const char *ip, const char *user, const char *pass, const char *db, uint16_t port, + void (*fp)(void *, TAOS_RES *, int), void *param, void **taos) { + taos_init(); + + if (!validUserName(user)) { globalCode = TSDB_CODE_INVALID_ACCT; return NULL; - } else { - size_t len = strlen(user); - if (len <= 0 || len > TSDB_USER_LEN) { - globalCode = TSDB_CODE_INVALID_ACCT; - return NULL; - } } - if (pass == NULL) { + if (!validPassword(pass)) { globalCode = TSDB_CODE_INVALID_PASS; return NULL; - } else { - size_t len = strlen(pass); - if (len <= 0 || len > TSDB_KEY_LEN) { - globalCode = TSDB_CODE_INVALID_PASS; - return NULL; - } } if (tscInitRpc(user, pass) != 0) { @@ -67,7 +73,7 @@ TAOS *taos_connect_imp(const char *ip, const char *user, const char *pass, const if (ip && ip[0]) { tscMgmtIpList.inUse = 0; - tscMgmtIpList.port = tsMgmtShellPort; + tscMgmtIpList.port = tsMnodeShellPort; tscMgmtIpList.numOfIps = 1; tscMgmtIpList.ip[0] = inet_addr(ip); @@ -82,20 +88,19 @@ TAOS *taos_connect_imp(const char *ip, const char *user, const char *pass, const } } - tscMgmtIpList.port = port ? port : tsMgmtShellPort; - - pObj = (STscObj *)malloc(sizeof(STscObj)); + tscMgmtIpList.port = port ? port : tsMnodeShellPort; + + STscObj *pObj = (STscObj *)calloc(1, sizeof(STscObj)); if (NULL == pObj) { globalCode = TSDB_CODE_CLI_OUT_OF_MEMORY; return NULL; } - memset(pObj, 0, sizeof(STscObj)); pObj->signature = pObj; strncpy(pObj->user, user, TSDB_USER_LEN); taosEncryptPass((uint8_t *)pass, strlen(pass), pObj->pass); - pObj->mgmtPort = port ? port : tsMgmtShellPort; + pObj->mgmtPort = port ? port : tsMnodeShellPort; if (db) { int32_t len = strlen(db); @@ -115,18 +120,17 @@ TAOS *taos_connect_imp(const char *ip, const char *user, const char *pass, const pthread_mutex_init(&pObj->mutex, NULL); - SSqlObj *pSql = (SSqlObj *)malloc(sizeof(SSqlObj)); + SSqlObj *pSql = (SSqlObj *)calloc(1, sizeof(SSqlObj)); if (NULL == pSql) { globalCode = TSDB_CODE_CLI_OUT_OF_MEMORY; free(pObj); return NULL; } - memset(pSql, 0, sizeof(SSqlObj)); pSql->pTscObj = pObj; pSql->signature = pSql; tsem_init(&pSql->rspSem, 0, 0); - tsem_init(&pSql->emptyRspSem, 0, 1); + pObj->pSql = pSql; pSql->fp = fp; pSql->param = param; @@ -142,46 +146,71 @@ TAOS *taos_connect_imp(const char *ip, const char *user, const char *pass, const return NULL; } - pSql->res.code = tscProcessSql(pSql); - if (fp != NULL) { - tscTrace("%p DB async connection is opening", pObj); - return pObj; - } - - if (pSql->res.code) { - taos_close(pObj); - return NULL; - } - - tscTrace("%p DB connection is opened", pObj); + // tsRpcHeaderSize will be updated during RPC initialization, so only after it initialization, this value is valid + tsInsertHeadSize = tsRpcHeadSize + sizeof(SShellSubmitMsg); return pObj; } +static void syncConnCallback(void *param, TAOS_RES *tres, int code) { + STscObj *pObj = (STscObj *)param; + assert(pObj != NULL && pObj->pSql != NULL); + + sem_post(&pObj->pSql->rspSem); +} + TAOS *taos_connect(const char *ip, const char *user, const char *pass, const char *db, uint16_t port) { if (ip == NULL || (ip != NULL && (strcmp("127.0.0.1", ip) == 0 || strcasecmp("localhost", ip) == 0))) { ip = tsMasterIp; } + tscTrace("try to create a connection to %s", ip); - void *taos = taos_connect_imp(ip, user, pass, db, port, NULL, NULL, NULL); - if (taos != NULL) { - STscObj *pObj = (STscObj *)taos; + STscObj *pObj = taosConnectImpl(ip, user, pass, db, port, NULL, NULL, NULL); + if (pObj != NULL) { + SSqlObj* pSql = pObj->pSql; + assert(pSql != NULL); + + pSql->fp = syncConnCallback; + pSql->param = pObj; + + tscProcessSql(pSql); + sem_wait(&pSql->rspSem); + + if (pSql->res.code != TSDB_CODE_SUCCESS) { + taos_close(pObj); + return NULL; + } + + tscTrace("%p DB connection is opening", pObj); // version compare only requires the first 3 segments of the version string - int code = taosCheckVersion(version, taos_get_server_info(taos), 3); + int code = taosCheckVersion(version, taos_get_server_info(pObj), 3); if (code != 0) { - pObj->pSql->res.code = code; - taos_close(taos); + pSql->res.code = code; + + taos_close(pObj); return NULL; + } else { + return pObj; } } - return taos; + return NULL; } TAOS *taos_connect_a(char *ip, char *user, char *pass, char *db, uint16_t port, void (*fp)(void *, TAOS_RES *, int), void *param, void **taos) { - return taos_connect_imp(ip, user, pass, db, port, fp, param, taos); + STscObj* pObj = taosConnectImpl(ip, user, pass, db, port, fp, param, taos); + if (pObj == NULL) { + return NULL; + } + + SSqlObj* pSql = pObj->pSql; + + pSql->res.code = tscProcessSql(pSql); + tscTrace("%p DB async connection is opening", pObj); + + return pObj; } void taos_close(TAOS *taos) { @@ -206,7 +235,7 @@ int taos_query_imp(STscObj *pObj, SSqlObj *pSql) { pSql->asyncTblPos = NULL; if (NULL != pSql->pTableHashList) { - taosCleanUpHashTable(pSql->pTableHashList); + taosHashCleanup(pSql->pTableHashList); pSql->pTableHashList = NULL; } @@ -408,14 +437,14 @@ static char *getArithemicInputSrc(void *param, char *name, int32_t colId) { SSqlFunctionExpr * pExpr = pSupport->pExpr; int32_t index = -1; - for (int32_t i = 0; i < pExpr->pBinExprInfo.numOfCols; ++i) { - if (strcmp(name, pExpr->pBinExprInfo.pReqColumns[i].name) == 0) { + for (int32_t i = 0; i < pExpr->binExprInfo.numOfCols; ++i) { + if (strcmp(name, pExpr->binExprInfo.pReqColumns[i].name) == 0) { index = i; break; } } - assert(index >= 0 && index < pExpr->pBinExprInfo.numOfCols); + assert(index >= 0 && index < pExpr->binExprInfo.numOfCols); return pSupport->data[index] + pSupport->offset * pSupport->elemSize[index]; } @@ -465,21 +494,21 @@ static void **doSetResultRowData(SSqlObj *pSql) { sas->offset = 0; sas->pExpr = pQueryInfo->fieldsInfo.pExpr[i]; - sas->numOfCols = sas->pExpr->pBinExprInfo.numOfCols; + sas->numOfCols = sas->pExpr->binExprInfo.numOfCols; if (pRes->buffer[i] == NULL) { pRes->buffer[i] = malloc(tscFieldInfoGetField(pQueryInfo, i)->bytes); } for(int32_t k = 0; k < sas->numOfCols; ++k) { - int32_t columnIndex = sas->pExpr->pBinExprInfo.pReqColumns[k].colIdxInBuf; + int32_t columnIndex = sas->pExpr->binExprInfo.pReqColumns[k].colIdxInBuf; SSqlExpr* pExpr = tscSqlExprGet(pQueryInfo, columnIndex); sas->elemSize[k] = pExpr->resBytes; sas->data[k] = (pRes->data + pRes->numOfRows* pExpr->offset) + pRes->row*pExpr->resBytes; } - tSQLBinaryExprCalcTraverse(sas->pExpr->pBinExprInfo.pBinExpr, 1, pRes->buffer[i], sas, TSQL_SO_ASC, getArithemicInputSrc); + tSQLBinaryExprCalcTraverse(sas->pExpr->binExprInfo.pBinExpr, 1, pRes->buffer[i], sas, TSQL_SO_ASC, getArithemicInputSrc); pRes->tsrow[i] = pRes->buffer[i]; free(sas); //todo optimization @@ -509,7 +538,7 @@ static bool tscHashRemainDataInSubqueryResultSet(SSqlObj *pSql) { SSqlCmd *pCmd1 = &pSql->pSubs[i]->cmd; SQueryInfo * pQueryInfo1 = tscGetQueryInfoDetail(pCmd1, pCmd1->clauseIndex); - SMeterMetaInfo *pMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo1, 0); + STableMetaInfo *pMetaInfo = tscGetMetaInfo(pQueryInfo1, 0); assert(pQueryInfo1->numOfTables == 1); @@ -705,8 +734,10 @@ TAOS_ROW taos_fetch_row(TAOS_RES *res) { } // current data are exhausted, fetch more data - if (pRes->data == NULL || (pRes->data != NULL && pRes->row >= pRes->numOfRows && pCmd->command == TSDB_SQL_RETRIEVE)) { + if (pRes->data == NULL || (pRes->data != NULL && pRes->row >= pRes->numOfRows && + (pCmd->command == TSDB_SQL_RETRIEVE || pCmd->command == TSDB_SQL_RETRIEVE_METRIC || pCmd->command == TSDB_SQL_FETCH))) { taos_fetch_rows_a(res, asyncFetchCallback, pSql->pTscObj); + sem_wait(&pSql->rspSem); } @@ -808,7 +839,7 @@ void taos_free_result_imp(TAOS_RES *res, int keepCmd) { pQueryInfo->type = TSDB_QUERY_TYPE_FREE_RESOURCE; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); /* * case 1. Partial data have been retrieved from vnodes, but not all data has been retrieved yet. @@ -823,7 +854,7 @@ void taos_free_result_imp(TAOS_RES *res, int keepCmd) { if (pRes->code != TSDB_CODE_QUERY_CANCELLED && ((pRes->numOfRows > 0 && pCmd->command < TSDB_SQL_LOCAL) || (pRes->code == TSDB_CODE_SUCCESS && pRes->numOfRows == 0 && pCmd->command == TSDB_SQL_SELECT && - pSql->pStream == NULL && pMeterMetaInfo->pMeterMeta != NULL))) { + pSql->pStream == NULL && pTableMetaInfo->pTableMeta != NULL))) { pCmd->command = (pCmd->command > TSDB_SQL_MGMT) ? TSDB_SQL_RETRIEVE : TSDB_SQL_FETCH; tscTrace("%p code:%d, numOfRows:%d, command:%d", pSql, pRes->code, pRes->numOfRows, pCmd->command); @@ -965,7 +996,7 @@ void taos_stop_query(TAOS_RES *res) { pSql->res.code = TSDB_CODE_QUERY_CANCELLED; SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); - if (tscIsTwoStageMergeMetricQuery(pQueryInfo, 0)) { + if (tscIsTwoStageSTableQuery(pQueryInfo, 0)) { tscKillMetricQuery(pSql); return; } @@ -1079,7 +1110,7 @@ int taos_validate_sql(TAOS *taos, const char *sql) { pSql->asyncTblPos = NULL; if (NULL != pSql->pTableHashList) { - taosCleanUpHashTable(pSql->pTableHashList); + taosHashCleanup(pSql->pTableHashList); pSql->pTableHashList = NULL; } @@ -1107,7 +1138,7 @@ static int tscParseTblNameList(SSqlObj *pSql, const char *tblNameList, int32_t t SQueryInfo *pQueryInfo = NULL; tscGetQueryInfoDetailSafely(pCmd, pCmd->clauseIndex, &pQueryInfo); - SMeterMetaInfo *pMeterMetaInfo = tscAddEmptyMeterMetaInfo(pQueryInfo); + STableMetaInfo *pTableMetaInfo = tscAddEmptyMetaInfo(pQueryInfo); if ((code = tscAllocPayload(pCmd, tblListLen + 16)) != TSDB_CODE_SUCCESS) { return code; @@ -1142,7 +1173,7 @@ static int tscParseTblNameList(SSqlObj *pSql, const char *tblNameList, int32_t t return code; } - if ((code = setMeterID(pMeterMetaInfo, &sToken, pSql)) != TSDB_CODE_SUCCESS) { + if ((code = setMeterID(pTableMetaInfo, &sToken, pSql)) != TSDB_CODE_SUCCESS) { return code; } @@ -1152,7 +1183,7 @@ static int tscParseTblNameList(SSqlObj *pSql, const char *tblNameList, int32_t t return code; } - if (payloadLen + strlen(pMeterMetaInfo->name) + 128 >= pCmd->allocSize) { + if (payloadLen + strlen(pTableMetaInfo->name) + 128 >= pCmd->allocSize) { char *pNewMem = realloc(pCmd->payload, pCmd->allocSize + tblListLen); if (pNewMem == NULL) { code = TSDB_CODE_CLI_OUT_OF_MEMORY; @@ -1165,7 +1196,7 @@ static int tscParseTblNameList(SSqlObj *pSql, const char *tblNameList, int32_t t pMsg = pCmd->payload; } - payloadLen += sprintf(pMsg + payloadLen, "%s,", pMeterMetaInfo->name); + payloadLen += sprintf(pMsg + payloadLen, "%s,", pTableMetaInfo->name); } *(pMsg + payloadLen) = '\0'; diff --git a/src/client/src/tscStream.c b/src/client/src/tscStream.c index 5fd0adf5b15b31abe741dd51025e8e0a5a211230..46e3ac2e60b9e28fa21251c0fbe4a2db3fdd0351 100644 --- a/src/client/src/tscStream.c +++ b/src/client/src/tscStream.c @@ -13,16 +13,16 @@ * along with this program. If not, see . */ +#include #include "os.h" +#include "taosmsg.h" #include "tlog.h" -#include "tscSQLParser.h" +#include "tscUtil.h" +#include "tsched.h" +#include "tsclient.h" #include "ttime.h" #include "ttimer.h" #include "tutil.h" -#include "tsched.h" -#include "taosmsg.h" -#include "tscUtil.h" -#include "tsclient.h" #include "tscProfile.h" @@ -72,14 +72,14 @@ static void tscProcessStreamLaunchQuery(SSchedMsg *pMsg) { pSql->param = pStream; SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, 0); - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); - int code = tscGetMeterMeta(pSql, pMeterMetaInfo); + int code = tscGetTableMeta(pSql, pTableMetaInfo); pSql->res.code = code; if (code == TSDB_CODE_ACTION_IN_PROGRESS) return; - if (code == 0 && UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { + if (code == 0 && UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { code = tscGetMetricMeta(pSql, 0); pSql->res.code = code; @@ -97,7 +97,7 @@ static void tscProcessStreamLaunchQuery(SSchedMsg *pMsg) { return; } - tscTrace("%p stream:%p start stream query on:%s", pSql, pStream, pMeterMetaInfo->name); + tscTrace("%p stream:%p start stream query on:%s", pSql, pStream, pTableMetaInfo->name); tscProcessSql(pStream->pSql); tscIncStreamExecutionCount(pStream); @@ -146,8 +146,8 @@ static void tscProcessStreamQueryCallback(void *param, TAOS_RES *tres, int numOf tscError("%p stream:%p, query data failed, code:%d, retry in %" PRId64 "ms", pStream->pSql, pStream, numOfRows, retryDelay); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfo(&pStream->pSql->cmd, 0, 0); - tscClearMeterMetaInfo(pMeterMetaInfo, true); + STableMetaInfo* pTableMetaInfo = tscGetTableMetaInfoFromCmd(&pStream->pSql->cmd, 0, 0); + tscClearMeterMetaInfo(pTableMetaInfo, true); tscSetRetryTimer(pStream, pStream->pSql, retryDelay); return; @@ -172,12 +172,12 @@ static void tscSetTimestampForRes(SSqlStream *pStream, SSqlObj *pSql) { static void tscProcessStreamRetrieveResult(void *param, TAOS_RES *res, int numOfRows) { SSqlStream * pStream = (SSqlStream *)param; SSqlObj * pSql = (SSqlObj *)res; - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(&pSql->cmd, 0, 0); + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(&pSql->cmd, 0, 0); if (pSql == NULL || numOfRows < 0) { int64_t retryDelayTime = tscGetRetryDelayTime(pStream->slidingTime, pStream->precision); tscError("%p stream:%p, retrieve data failed, code:%d, retry in %" PRId64 "ms", pSql, pStream, numOfRows, retryDelayTime); - tscClearMeterMetaInfo(pMeterMetaInfo, true); + tscClearMeterMetaInfo(pTableMetaInfo, true); tscSetRetryTimer(pStream, pStream->pSql, retryDelayTime); return; @@ -255,11 +255,11 @@ static void tscProcessStreamRetrieveResult(void *param, TAOS_RES *res, int numOf } } - tscTrace("%p stream:%p, query on:%s, fetch result completed, fetched rows:%d", pSql, pStream, pMeterMetaInfo->name, + tscTrace("%p stream:%p, query on:%s, fetch result completed, fetched rows:%d", pSql, pStream, pTableMetaInfo->name, pStream->numOfRes); // release the metric/meter meta information reference, so data in cache can be updated - tscClearMeterMetaInfo(pMeterMetaInfo, false); + tscClearMeterMetaInfo(pTableMetaInfo, false); tscSetNextLaunchTimer(pStream, pSql); } } @@ -540,13 +540,14 @@ TAOS_STREAM *taos_open_stream(TAOS *taos, const char *sqlstr, void (*fp)(void *p } SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, 0); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + pStream->fp = fp; pStream->callback = callback; pStream->param = param; pStream->pSql = pSql; - pStream->precision = pMeterMetaInfo->pMeterMeta->precision; + pStream->precision = tinfo.precision; pStream->ctime = taosGetTimestamp(pStream->precision); pStream->etime = pQueryInfo->etime; @@ -561,7 +562,7 @@ TAOS_STREAM *taos_open_stream(TAOS *taos, const char *sqlstr, void (*fp)(void *p taosTmrReset(tscProcessStreamTimer, starttime, pStream, tscTmr, &pStream->pTimer); tscTrace("%p stream:%p is opened, query on:%s, interval:%" PRId64 ", sliding:%" PRId64 ", first launched in:%" PRId64 ", sql:%s", pSql, - pStream, pMeterMetaInfo->name, pStream->interval, pStream->slidingTime, starttime, sqlstr); + pStream, pTableMetaInfo->name, pStream->interval, pStream->slidingTime, starttime, sqlstr); return pStream; } diff --git a/src/client/src/tscSub.c b/src/client/src/tscSub.c index 3d55ff1c7267adc242ff037e2df8faa423076319..014b0c5cb7fabf70a7d48918fe600a9936536cfd 100644 --- a/src/client/src/tscSub.c +++ b/src/client/src/tscSub.c @@ -175,10 +175,10 @@ int tscUpdateSubscription(STscObj* pObj, SSub* pSub) { return 0; } - SMeterMetaInfo *pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, 0, 0); + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, 0, 0); int numOfTables = 0; - if (!UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { - SSuperTableMeta* pMetricMeta = pMeterMetaInfo->pMetricMeta; + if (!UTIL_TABLE_IS_NOMRAL_TABLE(pTableMetaInfo)) { + SSuperTableMeta* pMetricMeta = pTableMetaInfo->pMetricMeta; for (int32_t i = 0; i < pMetricMeta->numOfVnodes; i++) { SVnodeSidList *pVnodeSidList = tscGetVnodeSidList(pMetricMeta, i); numOfTables += pVnodeSidList->numOfSids; @@ -191,19 +191,19 @@ int tscUpdateSubscription(STscObj* pObj, SSub* pSub) { return 0; } - if (UTIL_METER_IS_NOMRAL_METER(pMeterMetaInfo)) { + if (UTIL_TABLE_IS_NOMRAL_TABLE(pTableMetaInfo)) { numOfTables = 1; - int64_t uid = pMeterMetaInfo->pMeterMeta->uid; + int64_t uid = pTableMetaInfo->pTableMeta->uid; progress[0].uid = uid; progress[0].key = tscGetSubscriptionProgress(pSub, uid); } else { - SSuperTableMeta* pMetricMeta = pMeterMetaInfo->pMetricMeta; + SSuperTableMeta* pMetricMeta = pTableMetaInfo->pMetricMeta; numOfTables = 0; for (int32_t i = 0; i < pMetricMeta->numOfVnodes; i++) { SVnodeSidList *pVnodeSidList = tscGetVnodeSidList(pMetricMeta, i); for (int32_t j = 0; j < pVnodeSidList->numOfSids; j++) { - STableSidExtInfo *pMeterInfo = tscGetMeterSidInfo(pVnodeSidList, j); - int64_t uid = pMeterInfo->uid; + STableIdInfo *pTableMetaInfo = tscGetMeterSidInfo(pVnodeSidList, j); + int64_t uid = pTableMetaInfo->uid; progress[numOfTables].uid = uid; progress[numOfTables++].key = tscGetSubscriptionProgress(pSub, uid); } @@ -371,7 +371,7 @@ TAOS_RES *taos_consume(TAOS_SUB *tsub) { pSql->sqlstr = NULL; taos_free_result_imp(pSql, 0); pSql->sqlstr = sqlstr; - taosClearDataCache(tscCacheHandle); + taosCacheEmpty(tscCacheHandle); if (!tscUpdateSubscription(pSub->taos, pSub)) return NULL; tscTrace("meter synchronization completed"); } else { @@ -385,7 +385,7 @@ TAOS_RES *taos_consume(TAOS_SUB *tsub) { pSql->cmd.command = TSDB_SQL_SELECT; pQueryInfo->type = type; - tscGetMeterMetaInfo(&pSql->cmd, 0, 0)->vnodeIndex = 0; + tscGetTableMetaInfoFromCmd(&pSql->cmd, 0, 0)->vnodeIndex = 0; } tscDoQuery(pSql); diff --git a/src/client/src/tscSubquery.c b/src/client/src/tscSubquery.c new file mode 100644 index 0000000000000000000000000000000000000000..1df977d99626cec34ac3750eb668244dd06c33d4 --- /dev/null +++ b/src/client/src/tscSubquery.c @@ -0,0 +1,1572 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tscSubquery.h" +#include "os.h" +#include "qtsbuf.h" +#include "tsclient.h" + +typedef struct SInsertSupporter { + SSubqueryState* pState; + SSqlObj* pSql; +} SInsertSupporter; + +static void freeSubqueryObj(SSqlObj* pSql); + +static bool doCompare(int32_t order, int64_t left, int64_t right) { + if (order == TSQL_SO_ASC) { + return left < right; + } else { + return left > right; + } +} + +static int64_t doTSBlockIntersect(SSqlObj* pSql, SJoinSubquerySupporter* pSupporter1, + SJoinSubquerySupporter* pSupporter2, TSKEY* st, TSKEY* et) { + STSBuf* output1 = tsBufCreate(true); + STSBuf* output2 = tsBufCreate(true); + + *st = INT64_MAX; + *et = INT64_MIN; + + SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, pSql->cmd.clauseIndex); + + SLimitVal* pLimit = &pQueryInfo->limit; + int32_t order = pQueryInfo->order.order; + + SQueryInfo* pSubQueryInfo1 = tscGetQueryInfoDetail(&pSql->pSubs[0]->cmd, 0); + SQueryInfo* pSubQueryInfo2 = tscGetQueryInfoDetail(&pSql->pSubs[1]->cmd, 0); + + pSubQueryInfo1->tsBuf = output1; + pSubQueryInfo2->tsBuf = output2; + + tsBufResetPos(pSupporter1->pTSBuf); + tsBufResetPos(pSupporter2->pTSBuf); + + // TODO add more details information + if (!tsBufNextPos(pSupporter1->pTSBuf)) { + tsBufFlush(output1); + tsBufFlush(output2); + + tscTrace("%p input1 is empty, 0 for secondary query after ts blocks intersecting", pSql); + return 0; + } + + if (!tsBufNextPos(pSupporter2->pTSBuf)) { + tsBufFlush(output1); + tsBufFlush(output2); + + tscTrace("%p input2 is empty, 0 for secondary query after ts blocks intersecting", pSql); + return 0; + } + + int64_t numOfInput1 = 1; + int64_t numOfInput2 = 1; + + while (1) { + STSElem elem1 = tsBufGetElem(pSupporter1->pTSBuf); + STSElem elem2 = tsBufGetElem(pSupporter2->pTSBuf); + +#ifdef _DEBUG_VIEW + // for debug purpose + tscPrint("%" PRId64 ", tags:%d \t %" PRId64 ", tags:%d", elem1.ts, elem1.tag, elem2.ts, elem2.tag); +#endif + + if (elem1.tag < elem2.tag || (elem1.tag == elem2.tag && doCompare(order, elem1.ts, elem2.ts))) { + if (!tsBufNextPos(pSupporter1->pTSBuf)) { + break; + } + + numOfInput1++; + } else if (elem1.tag > elem2.tag || (elem1.tag == elem2.tag && doCompare(order, elem2.ts, elem1.ts))) { + if (!tsBufNextPos(pSupporter2->pTSBuf)) { + break; + } + + numOfInput2++; + } else { + /* + * in case of stable query, limit/offset is not applied here. the limit/offset is applied to the + * final results which is acquired after the secondry merge of in the client. + */ + if (pLimit->offset == 0 || pQueryInfo->intervalTime > 0 || QUERY_IS_STABLE_QUERY(pQueryInfo->type)) { + if (*st > elem1.ts) { + *st = elem1.ts; + } + + if (*et < elem1.ts) { + *et = elem1.ts; + } + + tsBufAppend(output1, elem1.vnode, elem1.tag, (const char*)&elem1.ts, sizeof(elem1.ts)); + tsBufAppend(output2, elem2.vnode, elem2.tag, (const char*)&elem2.ts, sizeof(elem2.ts)); + } else { + pLimit->offset -= 1; + } + + if (!tsBufNextPos(pSupporter1->pTSBuf)) { + break; + } + + numOfInput1++; + + if (!tsBufNextPos(pSupporter2->pTSBuf)) { + break; + } + + numOfInput2++; + } + } + + /* + * failed to set the correct ts order yet in two cases: + * 1. only one element + * 2. only one element for each tag. + */ + if (output1->tsOrder == -1) { + output1->tsOrder = TSQL_SO_ASC; + output2->tsOrder = TSQL_SO_ASC; + } + + tsBufFlush(output1); + tsBufFlush(output2); + + tsBufDestory(pSupporter1->pTSBuf); + tsBufDestory(pSupporter2->pTSBuf); + + tscTrace("%p input1:%" PRId64 ", input2:%" PRId64 ", final:%" PRId64 " for secondary query after ts blocks " + "intersecting, skey:%" PRId64 ", ekey:%" PRId64, pSql, + numOfInput1, numOfInput2, output1->numOfTotal, *st, *et); + + return output1->numOfTotal; +} + +// todo handle failed to create sub query +SJoinSubquerySupporter* tscCreateJoinSupporter(SSqlObj* pSql, SSubqueryState* pState, int32_t index) { + SJoinSubquerySupporter* pSupporter = calloc(1, sizeof(SJoinSubquerySupporter)); + if (pSupporter == NULL) { + return NULL; + } + + pSupporter->pObj = pSql; + pSupporter->pState = pState; + + pSupporter->subqueryIndex = index; + SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, pSql->cmd.clauseIndex); + + pSupporter->interval = pQueryInfo->intervalTime; + pSupporter->limit = pQueryInfo->limit; + + STableMetaInfo* pTableMetaInfo = tscGetTableMetaInfoFromCmd(&pSql->cmd, pSql->cmd.clauseIndex, index); + pSupporter->uid = pTableMetaInfo->pTableMeta->uid; + + assert (pSupporter->uid != 0); + + getTmpfilePath("join-", pSupporter->path); + pSupporter->f = fopen(pSupporter->path, "w"); + + if (pSupporter->f == NULL) { + tscError("%p failed to create tmp file:%s, reason:%s", pSql, pSupporter->path, strerror(errno)); + } + + return pSupporter; +} + +void tscDestroyJoinSupporter(SJoinSubquerySupporter* pSupporter) { + if (pSupporter == NULL) { + return; + } + + tscSqlExprInfoDestroy(&pSupporter->exprsInfo); + tscColumnBaseInfoDestroy(&pSupporter->colList); + + tscClearFieldInfo(&pSupporter->fieldsInfo); + + if (pSupporter->f != NULL) { + fclose(pSupporter->f); + unlink(pSupporter->path); + } + + tscTagCondRelease(&pSupporter->tagCond); + free(pSupporter); +} + +/* + * need the secondary query process + * In case of count(ts)/count(*)/spread(ts) query, that are only applied to + * primary timestamp column , the secondary query is not necessary + * + */ +bool needSecondaryQuery(SQueryInfo* pQueryInfo) { + for (int32_t i = 0; i < pQueryInfo->colList.numOfCols; ++i) { + SColumnBase* pBase = tscColumnBaseInfoGet(&pQueryInfo->colList, i); + if (pBase->colIndex.columnIndex != PRIMARYKEY_TIMESTAMP_COL_INDEX) { + return true; + } + } + + return false; +} + +/* + * launch secondary stage query to fetch the result that contains timestamp in set + */ +int32_t tscLaunchSecondPhaseSubqueries(SSqlObj* pSql) { + int32_t numOfSub = 0; + SJoinSubquerySupporter* pSupporter = NULL; + + /* + * If the columns are not involved in the final select clause, the secondary query will not be launched + * for the subquery. + */ + SSubqueryState* pState = NULL; + + for (int32_t i = 0; i < pSql->numOfSubs; ++i) { + pSupporter = pSql->pSubs[i]->param; + if (pSupporter->exprsInfo.numOfExprs > 0) { + ++numOfSub; + } + } + + assert(numOfSub > 0); + + // scan all subquery, if one sub query has only ts, ignore it + tscTrace("%p start to launch secondary subqueries, total:%d, only:%d needs to query, others are not retrieve in " + "select clause", pSql, pSql->numOfSubs, numOfSub); + + /* + * the subqueries that do not actually launch the secondary query to virtual node is set as completed. + */ + pState = pSupporter->pState; + pState->numOfTotal = pSql->numOfSubs; + pState->numOfCompleted = (pSql->numOfSubs - numOfSub); + + bool success = true; + + for (int32_t i = 0; i < pSql->numOfSubs; ++i) { + SSqlObj *pPrevSub = pSql->pSubs[i]; + pSql->pSubs[i] = NULL; + + pSupporter = pPrevSub->param; + + if (pSupporter->exprsInfo.numOfExprs == 0) { + tscTrace("%p subIndex: %d, not need to launch query, ignore it", pSql, i); + + tscDestroyJoinSupporter(pSupporter); + tscFreeSqlObj(pPrevSub); + + pSql->pSubs[i] = NULL; + continue; + } + + SQueryInfo *pSubQueryInfo = tscGetQueryInfoDetail(&pPrevSub->cmd, 0); + STSBuf *pTSBuf = pSubQueryInfo->tsBuf; + pSubQueryInfo->tsBuf = NULL; + + // free result for async object will also free sqlObj + assert(pSubQueryInfo->exprsInfo.numOfExprs == 1); // ts_comp query only requires one resutl columns + taos_free_result(pPrevSub); + + SSqlObj *pNew = createSubqueryObj(pSql, (int16_t) i, tscJoinQueryCallback, pSupporter, NULL); + if (pNew == NULL) { + tscDestroyJoinSupporter(pSupporter); + success = false; + break; + } + + tscClearSubqueryInfo(&pNew->cmd); + pSql->pSubs[i] = pNew; + + SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); + pQueryInfo->tsBuf = pTSBuf; // transfer the ownership of timestamp comp-z data to the new created object + + // set the second stage sub query for join process + pQueryInfo->type |= TSDB_QUERY_TYPE_JOIN_SEC_STAGE; + + pQueryInfo->intervalTime = pSupporter->interval; + pQueryInfo->groupbyExpr = pSupporter->groupbyExpr; + + tscColumnBaseInfoCopy(&pQueryInfo->colList, &pSupporter->colList, 0); + tscTagCondCopy(&pQueryInfo->tagCond, &pSupporter->tagCond); + + tscSqlExprCopy(&pQueryInfo->exprsInfo, &pSupporter->exprsInfo, pSupporter->uid, false); + tscFieldInfoCopyAll(&pQueryInfo->fieldsInfo, &pSupporter->fieldsInfo); + + pSupporter->exprsInfo.numOfExprs = 0; + pSupporter->fieldsInfo.numOfOutputCols = 0; + + /* + * if the first column of the secondary query is not ts function, add this function. + * Because this column is required to filter with timestamp after intersecting. + */ + if (pSupporter->exprsInfo.pExprs[0]->functionId != TSDB_FUNC_TS) { + tscAddTimestampColumn(pQueryInfo, TSDB_FUNC_TS, 0); + } + + SQueryInfo *pNewQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); + assert(pNew->numOfSubs == 0 && pNew->cmd.numOfClause == 1 && pNewQueryInfo->numOfTables == 1); + + tscFieldInfoCalOffset(pNewQueryInfo); + + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pNewQueryInfo, 0); + + /* + * When handling the projection query, the offset value will be modified for table-table join, which is changed + * during the timestamp intersection. + */ + pSupporter->limit = pQueryInfo->limit; + pNewQueryInfo->limit = pSupporter->limit; + + // fetch the join tag column + if (UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { + SSqlExpr *pExpr = tscSqlExprGet(pNewQueryInfo, 0); + assert(pQueryInfo->tagCond.joinInfo.hasJoin); + + int16_t tagColIndex = tscGetJoinTagColIndexByUid(&pQueryInfo->tagCond, pTableMetaInfo->pTableMeta->uid); + pExpr->param[0].i64Key = tagColIndex; + pExpr->numOfParams = 1; + } + + tscPrintSelectClause(pNew, 0); + + tscTrace("%p subquery:%p tableIndex:%d, vnodeIdx:%d, type:%d, exprInfo:%d, colList:%d, fieldsInfo:%d, name:%s", + pSql, pNew, 0, pTableMetaInfo->vnodeIndex, pNewQueryInfo->type, + pNewQueryInfo->exprsInfo.numOfExprs, pNewQueryInfo->colList.numOfCols, + pNewQueryInfo->fieldsInfo.numOfOutputCols, pNewQueryInfo->pTableMetaInfo[0]->name); + } + + //prepare the subqueries object failed, abort + if (!success) { + pSql->res.code = TSDB_CODE_CLI_OUT_OF_MEMORY; + tscError("%p failed to prepare subqueries objs for secondary phase query, numOfSub:%d, code:%d", pSql, + pSql->numOfSubs, pSql->res.code); + freeSubqueryObj(pSql); + + return pSql->res.code; + } + + for(int32_t i = 0; i < pSql->numOfSubs; ++i) { + SSqlObj* pSub = pSql->pSubs[i]; + if (pSub == NULL) { + continue; + } + + tscProcessSql(pSub); + } + + return TSDB_CODE_SUCCESS; +} + +void freeSubqueryObj(SSqlObj* pSql) { + SSubqueryState* pState = NULL; + + for (int32_t i = 0; i < pSql->numOfSubs; ++i) { + if (pSql->pSubs[i] != NULL) { + SJoinSubquerySupporter* p = pSql->pSubs[i]->param; + pState = p->pState; + + tscDestroyJoinSupporter(p); + + if (pSql->pSubs[i]->res.code == TSDB_CODE_SUCCESS) { + taos_free_result(pSql->pSubs[i]); + } + } + } + + tfree(pState); + pSql->numOfSubs = 0; +} + +static void doQuitSubquery(SSqlObj* pParentSql) { + freeSubqueryObj(pParentSql); + + tsem_wait(&pParentSql->emptyRspSem); + tsem_wait(&pParentSql->emptyRspSem); + + tsem_post(&pParentSql->rspSem); +} + +static void quitAllSubquery(SSqlObj* pSqlObj, SJoinSubquerySupporter* pSupporter) { + int32_t numOfTotal = pSupporter->pState->numOfTotal; + int32_t finished = atomic_add_fetch_32(&pSupporter->pState->numOfCompleted, 1); + + if (finished >= numOfTotal) { + pSqlObj->res.code = abs(pSupporter->pState->code); + tscError("%p all subquery return and query failed, global code:%d", pSqlObj, pSqlObj->res.code); + + doQuitSubquery(pSqlObj); + } +} + +// update the query time range according to the join results on timestamp +static void updateQueryTimeRange(SQueryInfo* pQueryInfo, int64_t st, int64_t et) { + assert(pQueryInfo->stime <= st && pQueryInfo->etime >= et); + + pQueryInfo->stime = st; + pQueryInfo->etime = et; +} + +static void joinRetrieveCallback(void* param, TAOS_RES* tres, int numOfRows) { + SJoinSubquerySupporter* pSupporter = (SJoinSubquerySupporter*)param; + SSqlObj* pParentSql = pSupporter->pObj; + + SSqlObj* pSql = (SSqlObj*)tres; + SSqlCmd* pCmd = &pSql->cmd; + SSqlRes* pRes = &pSql->res; + + SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); + + if ((pQueryInfo->type & TSDB_QUERY_TYPE_JOIN_SEC_STAGE) == 0) { + if (pSupporter->pState->code != TSDB_CODE_SUCCESS) { + tscError("%p abort query due to other subquery failure. code:%d, global code:%d", pSql, numOfRows, + pSupporter->pState->code); + + quitAllSubquery(pParentSql, pSupporter); + return; + } + + if (numOfRows > 0) { // write the data into disk + fwrite(pSql->res.data, pSql->res.numOfRows, 1, pSupporter->f); + fclose(pSupporter->f); + + STSBuf* pBuf = tsBufCreateFromFile(pSupporter->path, true); + if (pBuf == NULL) { + tscError("%p invalid ts comp file from vnode, abort sub query, file size:%d", pSql, numOfRows); + + pSupporter->pState->code = TSDB_CODE_APP_ERROR; // todo set the informative code + quitAllSubquery(pParentSql, pSupporter); + return; + } + + if (pSupporter->pTSBuf == NULL) { + tscTrace("%p create tmp file for ts block:%s", pSql, pBuf->path); + pSupporter->pTSBuf = pBuf; + } else { + assert(pQueryInfo->numOfTables == 1); // for subquery, only one metermetaInfo + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + + tsBufMerge(pSupporter->pTSBuf, pBuf, pTableMetaInfo->vnodeIndex); + tsBufDestory(pBuf); + } + + // open new file to save the result + getTmpfilePath("ts-join", pSupporter->path); + pSupporter->f = fopen(pSupporter->path, "w"); + pSql->res.row = pSql->res.numOfRows; + + taos_fetch_rows_a(tres, joinRetrieveCallback, param); + } else if (numOfRows == 0) { // no data from this vnode anymore + SQueryInfo* pParentQueryInfo = tscGetQueryInfoDetail(&pParentSql->cmd, pParentSql->cmd.clauseIndex); + + //todo refactor + if (tscNonOrderedProjectionQueryOnSTable(pParentQueryInfo, 0)) { + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + assert(pQueryInfo->numOfTables == 1); + + // for projection query, need to try next vnode + int32_t totalVnode = pTableMetaInfo->pMetricMeta->numOfVnodes; + if ((++pTableMetaInfo->vnodeIndex) < totalVnode) { + tscTrace("%p current vnode:%d exhausted, try next:%d. total vnode:%d. current numOfRes:%d", pSql, + pTableMetaInfo->vnodeIndex - 1, pTableMetaInfo->vnodeIndex, totalVnode, pRes->numOfTotal); + + pSql->cmd.command = TSDB_SQL_SELECT; + pSql->fp = tscJoinQueryCallback; + tscProcessSql(pSql); + + return; + } + } + + int32_t numOfTotal = pSupporter->pState->numOfTotal; + int32_t finished = atomic_add_fetch_32(&pSupporter->pState->numOfCompleted, 1); + + if (finished >= numOfTotal) { + assert(finished == numOfTotal); + + if (pSupporter->pState->code != TSDB_CODE_SUCCESS) { + tscTrace("%p sub:%p, numOfSub:%d, quit from further procedure due to other queries failure", pParentSql, tres, + pSupporter->subqueryIndex); + doQuitSubquery(pParentSql); + return; + } + + tscTrace("%p all subqueries retrieve ts complete, do ts block intersect", pParentSql); + + SJoinSubquerySupporter* p1 = pParentSql->pSubs[0]->param; + SJoinSubquerySupporter* p2 = pParentSql->pSubs[1]->param; + + TSKEY st, et; + + int64_t num = doTSBlockIntersect(pParentSql, p1, p2, &st, &et); + if (num <= 0) { // no result during ts intersect + tscTrace("%p free all sub SqlObj and quit", pParentSql); + doQuitSubquery(pParentSql); + } else { + updateQueryTimeRange(pParentQueryInfo, st, et); + tscLaunchSecondPhaseSubqueries(pParentSql); + } + } + } else { // failure of sub query + tscError("%p sub query failed, code:%d, index:%d", pSql, numOfRows, pSupporter->subqueryIndex); + pSupporter->pState->code = numOfRows; + + quitAllSubquery(pParentSql, pSupporter); + return; + } + + } else { // secondary stage retrieve, driven by taos_fetch_row or other functions + if (numOfRows < 0) { + pSupporter->pState->code = numOfRows; + tscError("%p retrieve failed, code:%d, index:%d", pSql, numOfRows, pSupporter->subqueryIndex); + } + + if (numOfRows >= 0) { + pSql->res.numOfTotal += pSql->res.numOfRows; + } + + if (tscNonOrderedProjectionQueryOnSTable(pQueryInfo, 0) && numOfRows == 0) { + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + assert(pQueryInfo->numOfTables == 1); + + // for projection query, need to try next vnode if current vnode is exhausted + if ((++pTableMetaInfo->vnodeIndex) < pTableMetaInfo->pMetricMeta->numOfVnodes) { + pSupporter->pState->numOfCompleted = 0; + pSupporter->pState->numOfTotal = 1; + + pSql->cmd.command = TSDB_SQL_SELECT; + pSql->fp = tscJoinQueryCallback; + tscProcessSql(pSql); + + return; + } + } + + int32_t numOfTotal = pSupporter->pState->numOfTotal; + int32_t finished = atomic_add_fetch_32(&pSupporter->pState->numOfCompleted, 1); + + if (finished >= numOfTotal) { + assert(finished == numOfTotal); + tscTrace("%p all %d secondary subquery retrieves completed, global code:%d", tres, numOfTotal, + pParentSql->res.code); + + if (pSupporter->pState->code != TSDB_CODE_SUCCESS) { + pParentSql->res.code = abs(pSupporter->pState->code); + freeSubqueryObj(pParentSql); + } + + tsem_post(&pParentSql->rspSem); + } else { + tscTrace("%p sub:%p completed, completed:%d, total:%d", pParentSql, tres, finished, numOfTotal); + } + } +} + +static SJoinSubquerySupporter* tscUpdateSubqueryStatus(SSqlObj* pSql, int32_t numOfFetch) { + int32_t notInvolved = 0; + SJoinSubquerySupporter* pSupporter = NULL; + SSubqueryState* pState = NULL; + + for(int32_t i = 0; i < pSql->numOfSubs; ++i) { + if (pSql->pSubs[i] == NULL) { + notInvolved++; + } else { + pSupporter = (SJoinSubquerySupporter*)pSql->pSubs[i]->param; + pState = pSupporter->pState; + } + } + + pState->numOfTotal = pSql->numOfSubs; + pState->numOfCompleted = pSql->numOfSubs - numOfFetch; + + return pSupporter; +} + +void tscFetchDatablockFromSubquery(SSqlObj* pSql) { + int32_t numOfFetch = 0; + assert(pSql->numOfSubs >= 1); + + for (int32_t i = 0; i < pSql->numOfSubs; ++i) { + if (pSql->pSubs[i] == NULL) { // this subquery does not need to involve in secondary query + continue; + } + + SSqlRes *pRes = &pSql->pSubs[i]->res; + SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSql->pSubs[i]->cmd, 0); + + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + + if (tscNonOrderedProjectionQueryOnSTable(pQueryInfo, 0)) { + if (pRes->row >= pRes->numOfRows && pTableMetaInfo->vnodeIndex < pTableMetaInfo->pMetricMeta->numOfVnodes && + (!tscHasReachLimitation(pQueryInfo, pRes))) { + numOfFetch++; + } + } else { + if (pRes->row >= pRes->numOfRows && (!tscHasReachLimitation(pQueryInfo, pRes))) { + numOfFetch++; + } + } + } + + if (numOfFetch <= 0) { + return; + } + + // TODO multi-vnode retrieve for projection query with limitation has bugs, since the global limiation is not handled + tscTrace("%p retrieve data from %d subqueries", pSql, numOfFetch); + + SJoinSubquerySupporter* pSupporter = tscUpdateSubqueryStatus(pSql, numOfFetch); + + for (int32_t i = 0; i < pSql->numOfSubs; ++i) { + SSqlObj* pSql1 = pSql->pSubs[i]; + if (pSql1 == NULL) { + continue; + } + + SSqlRes* pRes1 = &pSql1->res; + SSqlCmd* pCmd1 = &pSql1->cmd; + + pSupporter = (SJoinSubquerySupporter*)pSql1->param; + + // wait for all subqueries completed + SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd1, 0); + assert(pRes1->numOfRows >= 0 && pQueryInfo->numOfTables == 1); + + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + + if (pRes1->row >= pRes1->numOfRows) { + tscTrace("%p subquery:%p retrieve data from vnode, subquery:%d, vnodeIndex:%d", pSql, pSql1, + pSupporter->subqueryIndex, pTableMetaInfo->vnodeIndex); + + tscResetForNextRetrieve(pRes1); + pSql1->fp = joinRetrieveCallback; + + if (pCmd1->command < TSDB_SQL_LOCAL) { + pCmd1->command = (pCmd1->command > TSDB_SQL_MGMT) ? TSDB_SQL_RETRIEVE : TSDB_SQL_FETCH; + } + + tscProcessSql(pSql1); + } + } + + // wait for all subquery completed + tsem_wait(&pSql->rspSem); + + // update the records for each subquery + for(int32_t i = 0; i < pSql->numOfSubs; ++i) { + if (pSql->pSubs[i] == NULL) { + continue; + } + + SSqlRes* pRes1 = &pSql->pSubs[i]->res; + pRes1->numOfTotalInCurrentClause += pRes1->numOfRows; + } +} + +// all subqueries return, set the result output index +void tscSetupOutputColumnIndex(SSqlObj* pSql) { + SSqlCmd* pCmd = &pSql->cmd; + SSqlRes* pRes = &pSql->res; + + tscTrace("%p all subquery response, retrieve data", pSql); + + if (pRes->pColumnIndex != NULL) { + return; // the column transfer support struct has been built + } + + SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); + pRes->pColumnIndex = calloc(1, sizeof(SColumnIndex) * pQueryInfo->fieldsInfo.numOfOutputCols); + + for (int32_t i = 0; i < pQueryInfo->fieldsInfo.numOfOutputCols; ++i) { + SSqlExpr* pExpr = tscSqlExprGet(pQueryInfo, i); + + int32_t tableIndexOfSub = -1; + for (int32_t j = 0; j < pQueryInfo->numOfTables; ++j) { + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, j); + if (pTableMetaInfo->pTableMeta->uid == pExpr->uid) { + tableIndexOfSub = j; + break; + } + } + + assert(tableIndexOfSub >= 0 && tableIndexOfSub < pQueryInfo->numOfTables); + + SSqlCmd* pSubCmd = &pSql->pSubs[tableIndexOfSub]->cmd; + SQueryInfo* pSubQueryInfo = tscGetQueryInfoDetail(pSubCmd, 0); + + for (int32_t k = 0; k < pSubQueryInfo->exprsInfo.numOfExprs; ++k) { + SSqlExpr* pSubExpr = tscSqlExprGet(pSubQueryInfo, k); + if (pExpr->functionId == pSubExpr->functionId && pExpr->colInfo.colId == pSubExpr->colInfo.colId) { + pRes->pColumnIndex[i] = (SColumnIndex){.tableIndex = tableIndexOfSub, .columnIndex = k}; + break; + } + } + } +} + +void tscJoinQueryCallback(void* param, TAOS_RES* tres, int code) { + SSqlObj* pSql = (SSqlObj*)tres; + // STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(&pSql->cmd, 0, 0); + + // int32_t idx = pSql->cmd.vnodeIdx; + + // SVnodeSidList *vnodeInfo = NULL; + // if (pTableMetaInfo->pMetricMeta != NULL) { + // vnodeInfo = tscGetVnodeSidList(pTableMetaInfo->pMetricMeta, idx - 1); + // } + + SJoinSubquerySupporter* pSupporter = (SJoinSubquerySupporter*)param; + + // if (atomic_add_fetch_32(pSupporter->numOfComplete, 1) >= + // pSupporter->numOfTotal) { + // SSqlObj *pParentObj = pSupporter->pObj; + // + // if ((pSql->cmd.type & TSDB_QUERY_TYPE_JOIN_SEC_STAGE) != 1) { + // int32_t num = 0; + // tscFetchDatablockFromSubquery(pParentObj); + // TSKEY* ts = tscGetQualifiedTSList(pParentObj, &num); + // + // if (num <= 0) { + // // no qualified result + // } + // + // tscLaunchSecondPhaseSubqueries(pSql, ts, num); + // } else { + + // } + // } else { + SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, 0); + if ((pQueryInfo->type & TSDB_QUERY_TYPE_JOIN_SEC_STAGE) != TSDB_QUERY_TYPE_JOIN_SEC_STAGE) { + if (code != TSDB_CODE_SUCCESS) { // direct call joinRetrieveCallback and set the error code + joinRetrieveCallback(param, pSql, code); + } else { // first stage query, continue to retrieve data + pSql->fp = joinRetrieveCallback; + pSql->cmd.command = TSDB_SQL_FETCH; + tscProcessSql(pSql); + } + + } else { // second stage join subquery + SSqlObj* pParentSql = pSupporter->pObj; + + if (pSupporter->pState->code != TSDB_CODE_SUCCESS) { + tscError("%p abort query due to other subquery failure. code:%d, global code:%d", pSql, code, + pSupporter->pState->code); + quitAllSubquery(pParentSql, pSupporter); + + return; + } + + if (code != TSDB_CODE_SUCCESS) { + tscError("%p sub query failed, code:%d, set global code:%d, index:%d", pSql, code, code, + pSupporter->subqueryIndex); + pSupporter->pState->code = code; // todo set the informative code + + quitAllSubquery(pParentSql, pSupporter); + } else { + int32_t numOfTotal = pSupporter->pState->numOfTotal; + int32_t finished = atomic_add_fetch_32(&pSupporter->pState->numOfCompleted, 1); + + if (finished >= numOfTotal) { + assert(finished == numOfTotal); + + tscSetupOutputColumnIndex(pParentSql); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + + /** + * if the query is a continue query (vnodeIndex > 0 for projection query) for next vnode, do the retrieval of + * data instead of returning to its invoker + */ + if (pTableMetaInfo->vnodeIndex > 0 && tscNonOrderedProjectionQueryOnSTable(pQueryInfo, 0)) { + assert(pTableMetaInfo->vnodeIndex < pTableMetaInfo->pMetricMeta->numOfVnodes); + pSupporter->pState->numOfCompleted = 0; // reset the record value + + pSql->fp = joinRetrieveCallback; // continue retrieve data + pSql->cmd.command = TSDB_SQL_FETCH; + tscProcessSql(pSql); + } else { // first retrieve from vnode during the secondary stage sub-query + if (pParentSql->fp == NULL) { + tsem_wait(&pParentSql->emptyRspSem); + tsem_wait(&pParentSql->emptyRspSem); + + tsem_post(&pParentSql->rspSem); + } else { + // set the command flag must be after the semaphore been correctly set. + // pPObj->cmd.command = TSDB_SQL_RETRIEVE_METRIC; + // if (pPObj->res.code == TSDB_CODE_SUCCESS) { + // (*pPObj->fp)(pPObj->param, pPObj, 0); + // } else { + // tscQueueAsyncRes(pPObj); + // } + assert(0); + } + } + } + } + } +} + +///////////////////////////////////////////////////////////////////////////////////////// +static void tscRetrieveDataRes(void *param, TAOS_RES *tres, int code); + +static SSqlObj *tscCreateSqlObjForSubquery(SSqlObj *pSql, SRetrieveSupport *trsupport, SSqlObj *prevSqlObj); + +// todo merge with callback +int32_t tscLaunchJoinSubquery(SSqlObj *pSql, int16_t tableIndex, SJoinSubquerySupporter *pSupporter) { + SSqlCmd * pCmd = &pSql->cmd; + SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); + + pSql->res.qhandle = 0x1; + pSql->res.numOfRows = 0; + + if (pSql->pSubs == NULL) { + pSql->pSubs = calloc(pSupporter->pState->numOfTotal, POINTER_BYTES); + if (pSql->pSubs == NULL) { + return TSDB_CODE_CLI_OUT_OF_MEMORY; + } + } + + SSqlObj *pNew = createSubqueryObj(pSql, tableIndex, tscJoinQueryCallback, pSupporter, NULL); + if (pNew == NULL) { + return TSDB_CODE_CLI_OUT_OF_MEMORY; + } + + pSql->pSubs[pSql->numOfSubs++] = pNew; + assert(pSql->numOfSubs <= pSupporter->pState->numOfTotal); + + if (QUERY_IS_JOIN_QUERY(pQueryInfo->type)) { + addGroupInfoForSubquery(pSql, pNew, 0, tableIndex); + + // refactor as one method + SQueryInfo *pNewQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); + assert(pNewQueryInfo != NULL); + + tscColumnBaseInfoUpdateTableIndex(&pNewQueryInfo->colList, 0); + tscColumnBaseInfoCopy(&pSupporter->colList, &pNewQueryInfo->colList, 0); + + tscSqlExprCopy(&pSupporter->exprsInfo, &pNewQueryInfo->exprsInfo, pSupporter->uid, false); + tscFieldInfoCopyAll(&pSupporter->fieldsInfo, &pNewQueryInfo->fieldsInfo); + + tscTagCondCopy(&pSupporter->tagCond, &pNewQueryInfo->tagCond); + + pNew->cmd.numOfCols = 0; + pNewQueryInfo->intervalTime = 0; + memset(&pNewQueryInfo->limit, 0, sizeof(SLimitVal)); + + // backup the data and clear it in the sqlcmd object + pSupporter->groupbyExpr = pNewQueryInfo->groupbyExpr; + memset(&pNewQueryInfo->groupbyExpr, 0, sizeof(SSqlGroupbyExpr)); + + // this data needs to be transfer to support struct + pNewQueryInfo->fieldsInfo.numOfOutputCols = 0; + pNewQueryInfo->exprsInfo.numOfExprs = 0; + + // set the ts,tags that involved in join, as the output column of intermediate result + tscClearSubqueryInfo(&pNew->cmd); + + SSchema colSchema = {.type = TSDB_DATA_TYPE_BINARY, .bytes = 1}; + SColumnIndex index = {0, PRIMARYKEY_TIMESTAMP_COL_INDEX}; + + tscAddSpecialColumnForSelect(pNewQueryInfo, 0, TSDB_FUNC_TS_COMP, &index, &colSchema, TSDB_COL_NORMAL); + + // set the tags value for ts_comp function + SSqlExpr *pExpr = tscSqlExprGet(pNewQueryInfo, 0); + + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pNewQueryInfo, 0); + int16_t tagColIndex = tscGetJoinTagColIndexByUid(&pSupporter->tagCond, pTableMetaInfo->pTableMeta->uid); + + pExpr->param->i64Key = tagColIndex; + pExpr->numOfParams = 1; + + // add the filter tag column + for (int32_t i = 0; i < pSupporter->colList.numOfCols; ++i) { + SColumnBase *pColBase = &pSupporter->colList.pColList[i]; + if (pColBase->numOfFilters > 0) { // copy to the pNew->cmd.colList if it is filtered. + tscColumnBaseCopy(&pNewQueryInfo->colList.pColList[pNewQueryInfo->colList.numOfCols], pColBase); + pNewQueryInfo->colList.numOfCols++; + } + } + + tscTrace("%p subquery:%p tableIndex:%d, vnodeIdx:%d, type:%d, transfer to ts_comp query to retrieve timestamps, " + "exprInfo:%d, colList:%d, fieldsInfo:%d, name:%s", + pSql, pNew, tableIndex, pTableMetaInfo->vnodeIndex, pNewQueryInfo->type, + pNewQueryInfo->exprsInfo.numOfExprs, pNewQueryInfo->colList.numOfCols, + pNewQueryInfo->fieldsInfo.numOfOutputCols, pNewQueryInfo->pTableMetaInfo[0]->name); + tscPrintSelectClause(pNew, 0); + + tscTrace("%p subquery:%p tableIndex:%d, vnodeIdx:%d, type:%d, transfer to ts_comp query to retrieve timestamps, " + "exprInfo:%d, colList:%d, fieldsInfo:%d, name:%s", + pSql, pNew, tableIndex, pTableMetaInfo->vnodeIndex, pNewQueryInfo->type, + pNewQueryInfo->exprsInfo.numOfExprs, pNewQueryInfo->colList.numOfCols, + pNewQueryInfo->fieldsInfo.numOfOutputCols, pNewQueryInfo->pTableMetaInfo[0]->name); + tscPrintSelectClause(pNew, 0); + } else { + SQueryInfo *pNewQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); + pNewQueryInfo->type |= TSDB_QUERY_TYPE_SUBQUERY; + } + +#ifdef _DEBUG_VIEW + tscPrintSelectClause(pNew, 0); +#endif + + return tscProcessSql(pNew); +} + +// todo support async join query +int32_t tscHandleMasterJoinQuery(SSqlObj* pSql) { + SSqlCmd* pCmd = &pSql->cmd; + SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); + + assert((pQueryInfo->type & TSDB_QUERY_TYPE_SUBQUERY) == 0); + + SSubqueryState *pState = calloc(1, sizeof(SSubqueryState)); + + pState->numOfTotal = pQueryInfo->numOfTables; + + for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { + SJoinSubquerySupporter *pSupporter = tscCreateJoinSupporter(pSql, pState, i); + + if (pSupporter == NULL) { // failed to create support struct, abort current query + tscError("%p tableIndex:%d, failed to allocate join support object, abort further query", pSql, i); + pState->numOfCompleted = pQueryInfo->numOfTables - i - 1; + pSql->res.code = TSDB_CODE_CLI_OUT_OF_MEMORY; + + return pSql->res.code; + } + + int32_t code = tscLaunchJoinSubquery(pSql, i, pSupporter); + if (code != TSDB_CODE_SUCCESS) { // failed to create subquery object, quit query + tscDestroyJoinSupporter(pSupporter); + pSql->res.code = TSDB_CODE_CLI_OUT_OF_MEMORY; + + break; + } + } + + tsem_post(&pSql->emptyRspSem); + tsem_wait(&pSql->rspSem); + + tsem_post(&pSql->emptyRspSem); + + if (pSql->numOfSubs <= 0) { + pSql->cmd.command = TSDB_SQL_RETRIEVE_EMPTY_RESULT; + } else { + pSql->cmd.command = TSDB_SQL_METRIC_JOIN_RETRIEVE; + } + + return TSDB_CODE_SUCCESS; +} + +static void doCleanupSubqueries(SSqlObj *pSql, int32_t numOfSubs, SSubqueryState* pState) { + assert(numOfSubs <= pSql->numOfSubs && numOfSubs >= 0 && pState != NULL); + + for(int32_t i = 0; i < numOfSubs; ++i) { + SSqlObj* pSub = pSql->pSubs[i]; + assert(pSub != NULL); + + SRetrieveSupport* pSupport = pSub->param; + + tfree(pSupport->localBuffer); + + pthread_mutex_unlock(&pSupport->queryMutex); + pthread_mutex_destroy(&pSupport->queryMutex); + + tfree(pSupport); + + tscFreeSqlObj(pSub); + } + + free(pState); +} + +int32_t tscHandleMasterSTableQuery(SSqlObj *pSql) { + SSqlRes *pRes = &pSql->res; + SSqlCmd *pCmd = &pSql->cmd; + + // pRes->code check only serves in launching metric sub-queries + if (pRes->code == TSDB_CODE_QUERY_CANCELLED) { + pCmd->command = TSDB_SQL_RETRIEVE_METRIC; // enable the abort of kill metric function. + return pRes->code; + } + + tExtMemBuffer ** pMemoryBuf = NULL; + tOrderDescriptor *pDesc = NULL; + SColumnModel * pModel = NULL; + + pRes->qhandle = 1; // hack the qhandle check + + const uint32_t nBufferSize = (1 << 16); // 64KB + + SQueryInfo * pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + int32_t numOfSubQueries = pTableMetaInfo->pMetricMeta->numOfVnodes; + assert(numOfSubQueries > 0); + + int32_t ret = tscLocalReducerEnvCreate(pSql, &pMemoryBuf, &pDesc, &pModel, nBufferSize); + if (ret != 0) { + pRes->code = TSDB_CODE_CLI_OUT_OF_MEMORY; + if (pSql->fp) { + tscQueueAsyncRes(pSql); + } + return pRes->code; + } + + pSql->pSubs = calloc(numOfSubQueries, POINTER_BYTES); + pSql->numOfSubs = numOfSubQueries; + + tscTrace("%p retrieved query data from %d vnode(s)", pSql, numOfSubQueries); + SSubqueryState *pState = calloc(1, sizeof(SSubqueryState)); + pState->numOfTotal = numOfSubQueries; + pRes->code = TSDB_CODE_SUCCESS; + + int32_t i = 0; + for (; i < numOfSubQueries; ++i) { + SRetrieveSupport *trs = (SRetrieveSupport *)calloc(1, sizeof(SRetrieveSupport)); + if (trs == NULL) { + tscError("%p failed to malloc buffer for SRetrieveSupport, orderOfSub:%d, reason:%s", pSql, i, strerror(errno)); + break; + } + + trs->pExtMemBuffer = pMemoryBuf; + trs->pOrderDescriptor = pDesc; + trs->pState = pState; + + trs->localBuffer = (tFilePage *)calloc(1, nBufferSize + sizeof(tFilePage)); + if (trs->localBuffer == NULL) { + tscError("%p failed to malloc buffer for local buffer, orderOfSub:%d, reason:%s", pSql, i, strerror(errno)); + tfree(trs); + break; + } + + trs->subqueryIndex = i; + trs->pParentSqlObj = pSql; + trs->pFinalColModel = pModel; + + pthread_mutexattr_t mutexattr = {0}; + pthread_mutexattr_settype(&mutexattr, PTHREAD_MUTEX_RECURSIVE_NP); + pthread_mutex_init(&trs->queryMutex, &mutexattr); + pthread_mutexattr_destroy(&mutexattr); + + SSqlObj *pNew = tscCreateSqlObjForSubquery(pSql, trs, NULL); + if (pNew == NULL) { + tscError("%p failed to malloc buffer for subObj, orderOfSub:%d, reason:%s", pSql, i, strerror(errno)); + tfree(trs->localBuffer); + tfree(trs); + break; + } + + // todo handle multi-vnode situation + if (pQueryInfo->tsBuf) { + SQueryInfo *pNewQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); + pNewQueryInfo->tsBuf = tsBufClone(pQueryInfo->tsBuf); + } + + tscTrace("%p sub:%p create subquery success. orderOfSub:%d", pSql, pNew, trs->subqueryIndex); + } + + if (i < numOfSubQueries) { + tscError("%p failed to prepare subquery structure and launch subqueries", pSql); + pRes->code = TSDB_CODE_CLI_OUT_OF_MEMORY; + + tscLocalReducerEnvDestroy(pMemoryBuf, pDesc, pModel, numOfSubQueries); + doCleanupSubqueries(pSql, i, pState); + return pRes->code; // free all allocated resource + } + + if (pRes->code == TSDB_CODE_QUERY_CANCELLED) { + tscLocalReducerEnvDestroy(pMemoryBuf, pDesc, pModel, numOfSubQueries); + doCleanupSubqueries(pSql, i, pState); + return pRes->code; + } + + for(int32_t j = 0; j < numOfSubQueries; ++j) { + SSqlObj* pSub = pSql->pSubs[j]; + SRetrieveSupport* pSupport = pSub->param; + + tscTrace("%p sub:%p launch subquery, orderOfSub:%d.", pSql, pSub, pSupport->subqueryIndex); + tscProcessSql(pSub); + } + + return TSDB_CODE_SUCCESS; +} + +static void tscFreeSubSqlObj(SRetrieveSupport *trsupport, SSqlObj *pSql) { + tscTrace("%p start to free subquery result", pSql); + + if (pSql->res.code == TSDB_CODE_SUCCESS) { + taos_free_result(pSql); + } + + tfree(trsupport->localBuffer); + + pthread_mutex_unlock(&trsupport->queryMutex); + pthread_mutex_destroy(&trsupport->queryMutex); + + tfree(trsupport); +} + +static void tscRetrieveFromVnodeCallBack(void *param, TAOS_RES *tres, int numOfRows); + +static void tscAbortFurtherRetryRetrieval(SRetrieveSupport *trsupport, TAOS_RES *tres, int32_t errCode) { +// set no disk space error info +#ifdef WINDOWS + LPVOID lpMsgBuf; + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, + GetLastError(), MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language + (LPTSTR)&lpMsgBuf, 0, NULL); + tscError("sub:%p failed to flush data to disk:reason:%s", tres, lpMsgBuf); + LocalFree(lpMsgBuf); +#else + char buf[256] = {0}; + strerror_r(errno, buf, 256); + tscError("sub:%p failed to flush data to disk:reason:%s", tres, buf); +#endif + + trsupport->pState->code = -errCode; + trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY; + + pthread_mutex_unlock(&trsupport->queryMutex); + + tscRetrieveFromVnodeCallBack(trsupport, tres, trsupport->pState->code); +} + +static void tscHandleSubRetrievalError(SRetrieveSupport *trsupport, SSqlObj *pSql, int numOfRows) { + SSqlObj *pPObj = trsupport->pParentSqlObj; + int32_t subqueryIndex = trsupport->subqueryIndex; + + assert(pSql != NULL); + SSubqueryState* pState = trsupport->pState; + assert(pState->numOfCompleted < pState->numOfTotal && pState->numOfCompleted >= 0 && + pPObj->numOfSubs == pState->numOfTotal); + + /* retrieved in subquery failed. OR query cancelled in retrieve phase. */ + if (pState->code == TSDB_CODE_SUCCESS && pPObj->res.code != TSDB_CODE_SUCCESS) { + pState->code = -(int)pPObj->res.code; + + /* + * kill current sub-query connection, which may retrieve data from vnodes; + * Here we get: pPObj->res.code == TSDB_CODE_QUERY_CANCELLED + */ + pSql->res.numOfRows = 0; + trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY; // disable retry efforts + tscTrace("%p query is cancelled, sub:%p, orderOfSub:%d abort retrieve, code:%d", trsupport->pParentSqlObj, pSql, + subqueryIndex, pState->code); + } + + if (numOfRows >= 0) { // current query is successful, but other sub query failed, still abort current query. + tscTrace("%p sub:%p retrieve numOfRows:%d,orderOfSub:%d", pPObj, pSql, numOfRows, subqueryIndex); + tscError("%p sub:%p abort further retrieval due to other queries failure,orderOfSub:%d,code:%d", pPObj, pSql, + subqueryIndex, pState->code); + } else { + if (trsupport->numOfRetry++ < MAX_NUM_OF_SUBQUERY_RETRY && pState->code == TSDB_CODE_SUCCESS) { + /* + * current query failed, and the retry count is less than the available + * count, retry query clear previous retrieved data, then launch a new sub query + */ + tExtMemBufferClear(trsupport->pExtMemBuffer[subqueryIndex]); + + // clear local saved number of results + trsupport->localBuffer->numOfElems = 0; + pthread_mutex_unlock(&trsupport->queryMutex); + + tscTrace("%p sub:%p retrieve failed, code:%d, orderOfSub:%d, retry:%d", trsupport->pParentSqlObj, pSql, numOfRows, + subqueryIndex, trsupport->numOfRetry); + + SSqlObj *pNew = tscCreateSqlObjForSubquery(trsupport->pParentSqlObj, trsupport, pSql); + if (pNew == NULL) { + tscError("%p sub:%p failed to create new subquery sqlobj due to out of memory, abort retry", + trsupport->pParentSqlObj, pSql); + + pState->code = TSDB_CODE_CLI_OUT_OF_MEMORY; + trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY; + return; + } + + tscProcessSql(pNew); + return; + } else { // reach the maximum retry count, abort + atomic_val_compare_exchange_32(&pState->code, TSDB_CODE_SUCCESS, numOfRows); + tscError("%p sub:%p retrieve failed,code:%d,orderOfSub:%d failed.no more retry,set global code:%d", pPObj, pSql, + numOfRows, subqueryIndex, pState->code); + } + } + + int32_t numOfTotal = pState->numOfTotal; + + int32_t finished = atomic_add_fetch_32(&pState->numOfCompleted, 1); + if (finished < numOfTotal) { + tscTrace("%p sub:%p orderOfSub:%d freed, finished subqueries:%d", pPObj, pSql, trsupport->subqueryIndex, finished); + return tscFreeSubSqlObj(trsupport, pSql); + } + + // all subqueries are failed + tscError("%p retrieve from %d vnode(s) completed,code:%d.FAILED.", pPObj, pState->numOfTotal, pState->code); + pPObj->res.code = -(pState->code); + + // release allocated resource + tscLocalReducerEnvDestroy(trsupport->pExtMemBuffer, trsupport->pOrderDescriptor, trsupport->pFinalColModel, + pState->numOfTotal); + + tfree(trsupport->pState); + tscFreeSubSqlObj(trsupport, pSql); + + // in case of second stage join subquery, invoke its callback function instead of regular QueueAsyncRes + SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(&pPObj->cmd, 0); + + if ((pQueryInfo->type & TSDB_QUERY_TYPE_JOIN_SEC_STAGE) == TSDB_QUERY_TYPE_JOIN_SEC_STAGE) { + (*pPObj->fp)(pPObj->param, pPObj, pPObj->res.code); + } else { // regular super table query + if (pPObj->res.code != TSDB_CODE_SUCCESS) { + tscQueueAsyncRes(pPObj); + } + } +} + +static void tscRetrieveFromVnodeCallBack(void *param, TAOS_RES *tres, int numOfRows) { + SRetrieveSupport *trsupport = (SRetrieveSupport *)param; + int32_t idx = trsupport->subqueryIndex; + SSqlObj * pPObj = trsupport->pParentSqlObj; + tOrderDescriptor *pDesc = trsupport->pOrderDescriptor; + + SSqlObj *pSql = (SSqlObj *)tres; + if (pSql == NULL) { // sql object has been released in error process, return immediately + tscTrace("%p subquery has been released, idx:%d, abort", pPObj, idx); + return; + } + + SSubqueryState* pState = trsupport->pState; + assert(pState->numOfCompleted < pState->numOfTotal && pState->numOfCompleted >= 0 && + pPObj->numOfSubs == pState->numOfTotal); + + // query process and cancel query process may execute at the same time + pthread_mutex_lock(&trsupport->queryMutex); + + if (numOfRows < 0 || pState->code < 0 || pPObj->res.code != TSDB_CODE_SUCCESS) { + return tscHandleSubRetrievalError(trsupport, pSql, numOfRows); + } + + SSqlRes * pRes = &pSql->res; + SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, 0); + + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + + SVnodeSidList *vnodeInfo = tscGetVnodeSidList(pTableMetaInfo->pMetricMeta, idx); + SVnodeDesc * pSvd = &vnodeInfo->vpeerDesc[vnodeInfo->index]; + + if (numOfRows > 0) { + assert(pRes->numOfRows == numOfRows); + int64_t num = atomic_add_fetch_64(&pState->numOfRetrievedRows, numOfRows); + + tscTrace("%p sub:%p retrieve numOfRows:%d totalNumOfRows:%d from ip:%u,vid:%d,orderOfSub:%d", pPObj, pSql, + pRes->numOfRows, pState->numOfRetrievedRows, pSvd->ip, pSvd->vnode, idx); + + if (num > tsMaxNumOfOrderedResults && tscIsProjectionQueryOnSTable(pQueryInfo, 0)) { + tscError("%p sub:%p num of OrderedRes is too many, max allowed:%" PRId64 " , current:%" PRId64, + pPObj, pSql, tsMaxNumOfOrderedResults, num); + tscAbortFurtherRetryRetrieval(trsupport, tres, TSDB_CODE_SORTED_RES_TOO_MANY); + return; + } + + +#ifdef _DEBUG_VIEW + printf("received data from vnode: %d rows\n", pRes->numOfRows); + SSrcColumnInfo colInfo[256] = {0}; + + tscGetSrcColumnInfo(colInfo, pQueryInfo); + tColModelDisplayEx(pDesc->pColumnModel, pRes->data, pRes->numOfRows, pRes->numOfRows, colInfo); +#endif + if (tsTotalTmpDirGB != 0 && tsAvailTmpDirGB < tsMinimalTmpDirGB) { + tscError("%p sub:%p client disk space remain %.3f GB, need at least %.3f GB, stop query", pPObj, pSql, + tsAvailTmpDirGB, tsMinimalTmpDirGB); + tscAbortFurtherRetryRetrieval(trsupport, tres, TSDB_CODE_CLI_NO_DISKSPACE); + return; + } + + int32_t ret = saveToBuffer(trsupport->pExtMemBuffer[idx], pDesc, trsupport->localBuffer, pRes->data, + pRes->numOfRows, pQueryInfo->groupbyExpr.orderType); + if (ret < 0) { + // set no disk space error info, and abort retry + tscAbortFurtherRetryRetrieval(trsupport, tres, TSDB_CODE_CLI_NO_DISKSPACE); + } else { + pthread_mutex_unlock(&trsupport->queryMutex); + taos_fetch_rows_a(tres, tscRetrieveFromVnodeCallBack, param); + } + + } else { // all data has been retrieved to client + /* data in from current vnode is stored in cache and disk */ + uint32_t numOfRowsFromVnode = trsupport->pExtMemBuffer[idx]->numOfTotalElems + trsupport->localBuffer->numOfElems; + tscTrace("%p sub:%p all data retrieved from ip:%u,vid:%d, numOfRows:%d, orderOfSub:%d", pPObj, pSql, pSvd->ip, + pSvd->vnode, numOfRowsFromVnode, idx); + + tColModelCompact(pDesc->pColumnModel, trsupport->localBuffer, pDesc->pColumnModel->capacity); + +#ifdef _DEBUG_VIEW + printf("%" PRIu64 " rows data flushed to disk:\n", trsupport->localBuffer->numOfElems); + SSrcColumnInfo colInfo[256] = {0}; + tscGetSrcColumnInfo(colInfo, pQueryInfo); + tColModelDisplayEx(pDesc->pColumnModel, trsupport->localBuffer->data, trsupport->localBuffer->numOfElems, + trsupport->localBuffer->numOfElems, colInfo); +#endif + + if (tsTotalTmpDirGB != 0 && tsAvailTmpDirGB < tsMinimalTmpDirGB) { + tscError("%p sub:%p client disk space remain %.3f GB, need at least %.3f GB, stop query", pPObj, pSql, + tsAvailTmpDirGB, tsMinimalTmpDirGB); + tscAbortFurtherRetryRetrieval(trsupport, tres, TSDB_CODE_CLI_NO_DISKSPACE); + return; + } + + // each result for a vnode is ordered as an independant list, + // then used as an input of loser tree for disk-based merge routine + int32_t ret = tscFlushTmpBuffer(trsupport->pExtMemBuffer[idx], pDesc, trsupport->localBuffer, + pQueryInfo->groupbyExpr.orderType); + if (ret != 0) { + /* set no disk space error info, and abort retry */ + return tscAbortFurtherRetryRetrieval(trsupport, tres, TSDB_CODE_CLI_NO_DISKSPACE); + } + + // keep this value local variable, since the pState variable may be released by other threads, if atomic_add opertion + // increases the finished value up to pState->numOfTotal value, which means all subqueries are completed. + // In this case, the comparsion between finished value and released pState->numOfTotal is not safe. + int32_t numOfTotal = pState->numOfTotal; + + int32_t finished = atomic_add_fetch_32(&pState->numOfCompleted, 1); + if (finished < numOfTotal) { + tscTrace("%p sub:%p orderOfSub:%d freed, finished subqueries:%d", pPObj, pSql, trsupport->subqueryIndex, finished); + return tscFreeSubSqlObj(trsupport, pSql); + } + + // all sub-queries are returned, start to local merge process + pDesc->pColumnModel->capacity = trsupport->pExtMemBuffer[idx]->numOfElemsPerPage; + + tscTrace("%p retrieve from %d vnodes completed.final NumOfRows:%d,start to build loser tree", pPObj, + pState->numOfTotal, pState->numOfRetrievedRows); + + SQueryInfo *pPQueryInfo = tscGetQueryInfoDetail(&pPObj->cmd, 0); + tscClearInterpInfo(pPQueryInfo); + + tscCreateLocalReducer(trsupport->pExtMemBuffer, pState->numOfTotal, pDesc, trsupport->pFinalColModel, + &pPObj->cmd, &pPObj->res); + tscTrace("%p build loser tree completed", pPObj); + + pPObj->res.precision = pSql->res.precision; + pPObj->res.numOfRows = 0; + pPObj->res.row = 0; + + // only free once + tfree(trsupport->pState); + tscFreeSubSqlObj(trsupport, pSql); + + // set the command flag must be after the semaphore been correctly set. + pPObj->cmd.command = TSDB_SQL_RETRIEVE_METRIC; + if (pPObj->res.code == TSDB_CODE_SUCCESS) { + (*pPObj->fp)(pPObj->param, pPObj, 0); + } else { + tscQueueAsyncRes(pPObj); + } + } +} + +static SSqlObj *tscCreateSqlObjForSubquery(SSqlObj *pSql, SRetrieveSupport *trsupport, SSqlObj *prevSqlObj) { + const int32_t table_index = 0; + + SSqlObj *pNew = createSubqueryObj(pSql, table_index, tscRetrieveDataRes, trsupport, prevSqlObj); + if (pNew != NULL) { // the sub query of two-stage super table query + SQueryInfo *pQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); + pQueryInfo->type |= TSDB_QUERY_TYPE_STABLE_SUBQUERY; + + assert(pQueryInfo->numOfTables == 1 && pNew->cmd.numOfClause == 1); + + // launch subquery for each vnode, so the subquery index equals to the vnodeIndex. + STableMetaInfo *pTableMetaInfo = tscGetMetaInfo(pQueryInfo, table_index); + pTableMetaInfo->vnodeIndex = trsupport->subqueryIndex; + + pSql->pSubs[trsupport->subqueryIndex] = pNew; + } + + return pNew; +} + +void tscRetrieveDataRes(void *param, TAOS_RES *tres, int code) { + SRetrieveSupport *trsupport = (SRetrieveSupport *)param; + + SSqlObj* pParentSql = trsupport->pParentSqlObj; + SSqlObj* pSql = (SSqlObj *)tres; + + STableMetaInfo *pTableMetaInfo = tscGetTableMetaInfoFromCmd(&pSql->cmd, 0, 0); + assert(pSql->cmd.numOfClause == 1 && pSql->cmd.pQueryInfo[0]->numOfTables == 1); + + int32_t idx = pTableMetaInfo->vnodeIndex; + + SVnodeSidList *vnodeInfo = NULL; + SVnodeDesc * pSvd = NULL; + if (pTableMetaInfo->pMetricMeta != NULL) { + vnodeInfo = tscGetVnodeSidList(pTableMetaInfo->pMetricMeta, idx); + pSvd = &vnodeInfo->vpeerDesc[vnodeInfo->index]; + } + + SSubqueryState* pState = trsupport->pState; + assert(pState->numOfCompleted < pState->numOfTotal && pState->numOfCompleted >= 0 && + pParentSql->numOfSubs == pState->numOfTotal); + + if (pParentSql->res.code != TSDB_CODE_SUCCESS || pState->code != TSDB_CODE_SUCCESS) { + // metric query is killed, Note: code must be less than 0 + trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY; + if (pParentSql->res.code != TSDB_CODE_SUCCESS) { + code = -(int)(pParentSql->res.code); + } else { + code = pState->code; + } + tscTrace("%p query cancelled or failed, sub:%p, orderOfSub:%d abort, code:%d", pParentSql, pSql, + trsupport->subqueryIndex, code); + } + + /* + * if a query on a vnode is failed, all retrieve operations from vnode that occurs later + * than this one are actually not necessary, we simply call the tscRetrieveFromVnodeCallBack + * function to abort current and remain retrieve process. + * + * NOTE: threadsafe is required. + */ + if (code != TSDB_CODE_SUCCESS) { + if (trsupport->numOfRetry++ >= MAX_NUM_OF_SUBQUERY_RETRY) { + tscTrace("%p sub:%p reach the max retry count,set global code:%d", pParentSql, pSql, code); + atomic_val_compare_exchange_32(&pState->code, 0, code); + } else { // does not reach the maximum retry count, go on + tscTrace("%p sub:%p failed code:%d, retry:%d", pParentSql, pSql, code, trsupport->numOfRetry); + + SSqlObj *pNew = tscCreateSqlObjForSubquery(pParentSql, trsupport, pSql); + if (pNew == NULL) { + tscError("%p sub:%p failed to create new subquery due to out of memory, abort retry, vid:%d, orderOfSub:%d", + trsupport->pParentSqlObj, pSql, pSvd != NULL ? pSvd->vnode : -1, trsupport->subqueryIndex); + + pState->code = -TSDB_CODE_CLI_OUT_OF_MEMORY; + trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY; + } else { + SQueryInfo *pNewQueryInfo = tscGetQueryInfoDetail(&pNew->cmd, 0); + assert(pNewQueryInfo->pTableMetaInfo[0]->pTableMeta != NULL && pNewQueryInfo->pTableMetaInfo[0]->pMetricMeta != NULL); + tscProcessSql(pNew); + return; + } + } + } + + if (pState->code != TSDB_CODE_SUCCESS) { // failed, abort + if (vnodeInfo != NULL) { + tscTrace("%p sub:%p query failed,ip:%u,vid:%d,orderOfSub:%d,global code:%d", pParentSql, pSql, + vnodeInfo->vpeerDesc[vnodeInfo->index].ip, vnodeInfo->vpeerDesc[vnodeInfo->index].vnode, + trsupport->subqueryIndex, pState->code); + } else { + tscTrace("%p sub:%p query failed,orderOfSub:%d,global code:%d", pParentSql, pSql, + trsupport->subqueryIndex, pState->code); + } + + tscRetrieveFromVnodeCallBack(param, tres, pState->code); + } else { // success, proceed to retrieve data from dnode + if (vnodeInfo != NULL) { + tscTrace("%p sub:%p query complete,ip:%u,vid:%d,orderOfSub:%d,retrieve data", trsupport->pParentSqlObj, pSql, + vnodeInfo->vpeerDesc[vnodeInfo->index].ip, vnodeInfo->vpeerDesc[vnodeInfo->index].vnode, + trsupport->subqueryIndex); + } else { + tscTrace("%p sub:%p query complete, orderOfSub:%d,retrieve data", trsupport->pParentSqlObj, pSql, + trsupport->subqueryIndex); + } + + taos_fetch_rows_a(tres, tscRetrieveFromVnodeCallBack, param); + } +} + +static void multiVnodeInsertMerge(void* param, TAOS_RES* tres, int numOfRows) { + SInsertSupporter *pSupporter = (SInsertSupporter *)param; + SSqlObj* pParentObj = pSupporter->pSql; + SSqlCmd* pParentCmd = &pParentObj->cmd; + + SSubqueryState* pState = pSupporter->pState; + int32_t total = pState->numOfTotal; + + // increase the total inserted rows + if (numOfRows > 0) { + pParentObj->res.numOfRows += numOfRows; + } + + int32_t completed = atomic_add_fetch_32(&pState->numOfCompleted, 1); + if (completed < total) { + return; + } + + tscTrace("%p Async insertion completed, total inserted:%d", pParentObj, pParentObj->res.numOfRows); + + // release data block data + pParentCmd->pDataBlocks = tscDestroyBlockArrayList(pParentCmd->pDataBlocks); + + // restore user defined fp + pParentObj->fp = pParentObj->fetchFp; + + // all data has been sent to vnode, call user function + (*pParentObj->fp)(pParentObj->param, tres, numOfRows); +} + +int32_t tscHandleMultivnodeInsert(SSqlObj *pSql) { + SSqlRes *pRes = &pSql->res; + SSqlCmd *pCmd = &pSql->cmd; + + pRes->qhandle = 1; // hack the qhandle check + SDataBlockList *pDataBlocks = pCmd->pDataBlocks; + + pSql->pSubs = calloc(pDataBlocks->nSize, POINTER_BYTES); + pSql->numOfSubs = pDataBlocks->nSize; + assert(pDataBlocks->nSize > 0); + + tscTrace("%p submit data to %d vnode(s)", pSql, pDataBlocks->nSize); + SSubqueryState *pState = calloc(1, sizeof(SSubqueryState)); + pState->numOfTotal = pSql->numOfSubs; + + pRes->code = TSDB_CODE_SUCCESS; + + int32_t i = 0; + for (; i < pSql->numOfSubs; ++i) { + SInsertSupporter* pSupporter = calloc(1, sizeof(SInsertSupporter)); + pSupporter->pSql = pSql; + pSupporter->pState = pState; + + SSqlObj *pNew = createSubqueryObj(pSql, 0, multiVnodeInsertMerge, pSupporter, NULL); + if (pNew == NULL) { + tscError("%p failed to malloc buffer for subObj, orderOfSub:%d, reason:%s", pSql, i, strerror(errno)); + break; + } + + pSql->pSubs[i] = pNew; + tscTrace("%p sub:%p create subObj success. orderOfSub:%d", pSql, pNew, i); + } + + if (i < pSql->numOfSubs) { + tscError("%p failed to prepare subObj structure and launch sub-insertion", pSql); + pRes->code = TSDB_CODE_CLI_OUT_OF_MEMORY; + return pRes->code; // free all allocated resource + } + + for (int32_t j = 0; j < pSql->numOfSubs; ++j) { + SSqlObj *pSub = pSql->pSubs[j]; + pSub->cmd.command = TSDB_SQL_INSERT; + int32_t code = tscCopyDataBlockToPayload(pSub, pDataBlocks->pData[j]); + + if (code != TSDB_CODE_SUCCESS) { + tscTrace("%p prepare submit data block failed in async insertion, vnodeIdx:%d, total:%d, code:%d", pSql, j, + pDataBlocks->nSize, code); + } + + tscTrace("%p sub:%p launch sub insert, orderOfSub:%d", pSql, pSub, j); + tscProcessSql(pSub); + } + + return TSDB_CODE_SUCCESS; +} diff --git a/src/client/src/tscSystem.c b/src/client/src/tscSystem.c index d7fe6f4ac8d6fe0462b065a2b2434d332b4ecfb7..c594e047152313cb0a1554ee6e2fe5b43e4541b1 100644 --- a/src/client/src/tscSystem.c +++ b/src/client/src/tscSystem.c @@ -34,11 +34,9 @@ void * pTscMgmtConn; void * pSlaveConn; void * tscCacheHandle; int32_t globalCode = 0; -int initialized = 0; int slaveIndex; void * tscTmr; void * tscQhandle; -void * tscConnCache; void * tscCheckDiskUsageTmr; int tsInsertHeadSize; @@ -151,7 +149,7 @@ void taos_init_imp() { } tscMgmtIpList.inUse = 0; - tscMgmtIpList.port = tsMgmtShellPort; + tscMgmtIpList.port = tsMnodeShellPort; tscMgmtIpList.numOfIps = 1; tscMgmtIpList.ip[0] = inet_addr(tsMasterIp); @@ -160,7 +158,7 @@ void taos_init_imp() { tscMgmtIpList.ip[1] = inet_addr(tsSecondIp); } - tscInitMsgs(); + tscInitMsgsFp(); slaveIndex = rand(); int queueSize = tsMaxVnodeConnections + tsMaxMeterConnections + tsMaxMgmtConnections + tsMaxMgmtConnections; @@ -186,13 +184,9 @@ void taos_init_imp() { refreshTime = refreshTime > 2 ? 2 : refreshTime; refreshTime = refreshTime < 1 ? 1 : refreshTime; - if (tscCacheHandle == NULL) tscCacheHandle = taosInitDataCache(tsMaxMeterConnections / 2, tscTmr, refreshTime); + if (tscCacheHandle == NULL) tscCacheHandle = taosCacheInit(tscTmr, refreshTime); - tscConnCache = taosOpenConnCache(tsMaxMeterConnections * 2, NULL/*taosCloseRpcConn*/, tscTmr, tsShellActivityTimer * 1000); - - initialized = 1; tscTrace("client is initialized successfully"); - tsInsertHeadSize = tsRpcHeadSize + sizeof(SShellSubmitMsg); } void taos_init() { pthread_once(&tscinit, taos_init_imp); } diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index 397ef2f0d03ed9fe4cc3c67b85dfdefcbdb0439a..aa4e5c93aa7a067c00cc4221e574a34e66c56c1f 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -16,14 +16,14 @@ #include "tscUtil.h" #include "hash.h" #include "os.h" +#include "qast.h" #include "taosmsg.h" -#include "tast.h" #include "tcache.h" #include "tkey.h" #include "tmd5.h" -#include "tscJoinProcess.h" #include "tscProfile.h" #include "tscSecondaryMerge.h" +#include "tscSubquery.h" #include "tschemautil.h" #include "tsclient.h" #include "ttimer.h" @@ -40,12 +40,12 @@ */ void tscGetMetricMetaCacheKey(SQueryInfo* pQueryInfo, char* str, uint64_t uid) { int32_t index = -1; - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoByUid(pQueryInfo, uid, &index); + STableMetaInfo* pTableMetaInfo = tscGetMeterMetaInfoByUid(pQueryInfo, uid, &index); int32_t len = 0; char tagIdBuf[128] = {0}; - for (int32_t i = 0; i < pMeterMetaInfo->numOfTags; ++i) { - len += sprintf(&tagIdBuf[len], "%d,", pMeterMetaInfo->tagColumnIndex[i]); + for (int32_t i = 0; i < pTableMetaInfo->numOfTags; ++i) { + len += sprintf(&tagIdBuf[len], "%d,", pTableMetaInfo->tagColumnIndex[i]); } STagCond* pTagCond = &pQueryInfo->tagCond; @@ -64,7 +64,7 @@ void tscGetMetricMetaCacheKey(SQueryInfo* pQueryInfo, char* str, uint64_t uid) { size_t tbnameCondLen = pTagCond->tbnameCond.cond != NULL ? strlen(pTagCond->tbnameCond.cond) : 0; size_t redundantLen = 20; - size_t bufSize = strlen(pMeterMetaInfo->name) + tbnameCondLen + strlen(join) + strlen(tagIdBuf); + size_t bufSize = strlen(pTableMetaInfo->name) + tbnameCondLen + strlen(join) + strlen(tagIdBuf); if (cond != NULL && cond->cond != NULL) { bufSize += strlen(cond->cond); } @@ -72,7 +72,7 @@ void tscGetMetricMetaCacheKey(SQueryInfo* pQueryInfo, char* str, uint64_t uid) { bufSize = (size_t)((bufSize + redundantLen) * 1.5); char* tmp = calloc(1, bufSize); - int32_t keyLen = snprintf(tmp, bufSize, "%s,%s,%s,%d,%s,[%s],%d", pMeterMetaInfo->name, + int32_t keyLen = snprintf(tmp, bufSize, "%s,%s,%s,%d,%s,[%s],%d", pTableMetaInfo->name, ((cond != NULL && cond->cond != NULL) ? cond->cond : NULL), (tbnameCondLen > 0 ? pTagCond->tbnameCond.cond : NULL), pTagCond->relType, join, tagIdBuf, pQueryInfo->groupbyExpr.orderType); @@ -191,7 +191,7 @@ SVnodeSidList* tscGetVnodeSidList(SSuperTableMeta* pMetricmeta, int32_t vnodeIdx return (SVnodeSidList*)(pMetricmeta->list[vnodeIdx] + (char*)pMetricmeta); } -STableSidExtInfo* tscGetMeterSidInfo(SVnodeSidList* pSidList, int32_t idx) { +STableIdInfo* tscGetMeterSidInfo(SVnodeSidList* pSidList, int32_t idx) { if (pSidList == NULL) { tscError("illegal sidlist"); return 0; @@ -206,25 +206,25 @@ STableSidExtInfo* tscGetMeterSidInfo(SVnodeSidList* pSidList, int32_t idx) { assert(pSidList->pSidExtInfoList[idx] >= 0); - return (STableSidExtInfo*)(pSidList->pSidExtInfoList[idx] + (char*)pSidList); + return (STableIdInfo*)(pSidList->pSidExtInfoList[idx] + (char*)pSidList); } -bool tscIsTwoStageMergeMetricQuery(SQueryInfo* pQueryInfo, int32_t tableIndex) { +bool tscIsTwoStageSTableQuery(SQueryInfo* pQueryInfo, int32_t tableIndex) { if (pQueryInfo == NULL) { return false; } - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, tableIndex); - if (pMeterMetaInfo == NULL) { + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableIndex); + if (pTableMetaInfo == NULL) { return false; } // for select query super table, the metricmeta can not be null in any cases. - if (pQueryInfo->command == TSDB_SQL_SELECT && UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { - assert(pMeterMetaInfo->pMetricMeta != NULL); + if (pQueryInfo->command == TSDB_SQL_SELECT && UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { + assert(pTableMetaInfo->pMetricMeta != NULL); } - if (pMeterMetaInfo->pMetricMeta == NULL) { + if (pTableMetaInfo->pMetricMeta == NULL) { return false; } @@ -239,21 +239,21 @@ bool tscIsTwoStageMergeMetricQuery(SQueryInfo* pQueryInfo, int32_t tableIndex) { if (((pQueryInfo->type & TSDB_QUERY_TYPE_STABLE_SUBQUERY) != TSDB_QUERY_TYPE_STABLE_SUBQUERY) && pQueryInfo->command == TSDB_SQL_SELECT) { - return UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo); + return UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo); } return false; } bool tscIsProjectionQueryOnSTable(SQueryInfo* pQueryInfo, int32_t tableIndex) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, tableIndex); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, tableIndex); /* * In following cases, return false for non ordered project query on super table * 1. failed to get metermeta from server; 2. not a super table; 3. limitation is 0; * 4. show queries, instead of a select query */ - if (pMeterMetaInfo == NULL || !UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo) || + if (pTableMetaInfo == NULL || !UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo) || pQueryInfo->command == TSDB_SQL_RETRIEVE_EMPTY_RESULT || pQueryInfo->exprsInfo.numOfExprs == 0) { return false; } @@ -505,7 +505,7 @@ void tscDestroyDataBlock(STableDataBlocks* pDataBlock) { tfree(pDataBlock->params); // free the refcount for metermeta - taosRemoveDataFromCache(tscCacheHandle, (void**)&(pDataBlock->pMeterMeta), false); + taosCacheRelease(tscCacheHandle, (void**)&(pDataBlock->pTableMeta), false); tfree(pDataBlock); } @@ -579,21 +579,21 @@ void* tscDestroyBlockArrayList(SDataBlockList* pList) { int32_t tscCopyDataBlockToPayload(SSqlObj* pSql, STableDataBlocks* pDataBlock) { SSqlCmd* pCmd = &pSql->cmd; - assert(pDataBlock->pMeterMeta != NULL); + assert(pDataBlock->pTableMeta != NULL); pCmd->numOfTablesInSubmit = pDataBlock->numOfTables; assert(pCmd->numOfClause == 1); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, 0); + STableMetaInfo* pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, 0); // set the correct metermeta object, the metermeta has been locked in pDataBlocks, so it must be in the cache - if (pMeterMetaInfo->pMeterMeta != pDataBlock->pMeterMeta) { - strcpy(pMeterMetaInfo->name, pDataBlock->tableId); - taosRemoveDataFromCache(tscCacheHandle, (void**)&(pMeterMetaInfo->pMeterMeta), false); + if (pTableMetaInfo->pTableMeta != pDataBlock->pTableMeta) { + strcpy(pTableMetaInfo->name, pDataBlock->tableId); + taosCacheRelease(tscCacheHandle, (void**)&(pTableMetaInfo->pTableMeta), false); - pMeterMetaInfo->pMeterMeta = taosTransferDataInCache(tscCacheHandle, (void**)&pDataBlock->pMeterMeta); + pTableMetaInfo->pTableMeta = taosCacheTransfer(tscCacheHandle, (void**)&pDataBlock->pTableMeta); } else { - assert(strncmp(pMeterMetaInfo->name, pDataBlock->tableId, tListLen(pDataBlock->tableId)) == 0); + assert(strncmp(pTableMetaInfo->name, pDataBlock->tableId, tListLen(pDataBlock->tableId)) == 0); } /* @@ -614,7 +614,7 @@ int32_t tscCopyDataBlockToPayload(SSqlObj* pSql, STableDataBlocks* pDataBlock) { */ pCmd->payloadLen = pDataBlock->nAllocSize - tsRpcHeadSize; - assert(pCmd->allocSize >= pCmd->payloadLen + tsRpcHeadSize + 100); + assert(pCmd->allocSize >= pCmd->payloadLen + tsRpcHeadSize + 100 && pCmd->payloadLen > 0); return TSDB_CODE_SUCCESS; } @@ -637,7 +637,7 @@ void tscFreeUnusedDataBlocks(SDataBlockList* pList) { * @return */ int32_t tscCreateDataBlock(size_t initialSize, int32_t rowSize, int32_t startOffset, const char* name, - STableMeta* pMeterMeta, STableDataBlocks** dataBlocks) { + STableMeta* pTableMeta, STableDataBlocks** dataBlocks) { STableDataBlocks* dataBuf = (STableDataBlocks*)calloc(1, sizeof(STableDataBlocks)); if (dataBuf == NULL) { tscError("failed to allocated memory, reason:%s", strerror(errno)); @@ -665,30 +665,30 @@ int32_t tscCreateDataBlock(size_t initialSize, int32_t rowSize, int32_t startOff * due to operation such as drop database. So here we add the reference count directly instead of invoke * taosGetDataFromCache, which may return NULL value. */ - dataBuf->pMeterMeta = taosGetDataFromExists(tscCacheHandle, pMeterMeta); - assert(initialSize > 0 && pMeterMeta != NULL && dataBuf->pMeterMeta != NULL); + dataBuf->pTableMeta = taosCacheAcquireByData(tscCacheHandle, pTableMeta); + assert(initialSize > 0 && pTableMeta != NULL && dataBuf->pTableMeta != NULL); *dataBlocks = dataBuf; return TSDB_CODE_SUCCESS; } int32_t tscGetDataBlockFromList(void* pHashList, SDataBlockList* pDataBlockList, int64_t id, int32_t size, - int32_t startOffset, int32_t rowSize, const char* tableId, STableMeta* pMeterMeta, + int32_t startOffset, int32_t rowSize, const char* tableId, STableMeta* pTableMeta, STableDataBlocks** dataBlocks) { *dataBlocks = NULL; - STableDataBlocks** t1 = (STableDataBlocks**)taosGetDataFromHashTable(pHashList, (const char*)&id, sizeof(id)); + STableDataBlocks** t1 = (STableDataBlocks**)taosHashGet(pHashList, (const char*)&id, sizeof(id)); if (t1 != NULL) { *dataBlocks = *t1; } if (*dataBlocks == NULL) { - int32_t ret = tscCreateDataBlock((size_t)size, rowSize, startOffset, tableId, pMeterMeta, dataBlocks); + int32_t ret = tscCreateDataBlock((size_t)size, rowSize, startOffset, tableId, pTableMeta, dataBlocks); if (ret != TSDB_CODE_SUCCESS) { return ret; } - taosAddToHashTable(pHashList, (const char*)&id, sizeof(int64_t), (char*)dataBlocks, POINTER_BYTES); + taosHashPut(pHashList, (const char*)&id, sizeof(int64_t), (char*)dataBlocks, POINTER_BYTES); tscAppendDataBlock(pDataBlockList, *dataBlocks); } @@ -698,19 +698,20 @@ int32_t tscGetDataBlockFromList(void* pHashList, SDataBlockList* pDataBlockList, int32_t tscMergeTableDataBlocks(SSqlObj* pSql, SDataBlockList* pTableDataBlockList) { SSqlCmd* pCmd = &pSql->cmd; - void* pVnodeDataBlockHashList = taosInitHashTable(128, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false); + void* pVnodeDataBlockHashList = taosHashInit(128, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false); SDataBlockList* pVnodeDataBlockList = tscCreateBlockArrayList(); for (int32_t i = 0; i < pTableDataBlockList->nSize; ++i) { STableDataBlocks* pOneTableBlock = pTableDataBlockList->pData[i]; STableDataBlocks* dataBuf = NULL; - int32_t ret = - tscGetDataBlockFromList(pVnodeDataBlockHashList, pVnodeDataBlockList, pOneTableBlock->vgid, TSDB_PAYLOAD_SIZE, - tsInsertHeadSize, 0, pOneTableBlock->tableId, pOneTableBlock->pMeterMeta, &dataBuf); + + int32_t ret = + tscGetDataBlockFromList(pVnodeDataBlockHashList, pVnodeDataBlockList, pOneTableBlock->vgId, TSDB_PAYLOAD_SIZE, + tsInsertHeadSize, 0, pOneTableBlock->tableId, pOneTableBlock->pTableMeta, &dataBuf); if (ret != TSDB_CODE_SUCCESS) { tscError("%p failed to prepare the data block buffer for merging table data, code:%d", pSql, ret); - taosCleanUpHashTable(pVnodeDataBlockHashList); + taosHashCleanup(pVnodeDataBlockHashList); tscDestroyBlockArrayList(pVnodeDataBlockList); return ret; } @@ -728,7 +729,7 @@ int32_t tscMergeTableDataBlocks(SSqlObj* pSql, SDataBlockList* pTableDataBlockLi } else { // failed to allocate memory, free already allocated memory and return error code tscError("%p failed to allocate memory for merging submit block, size:%d", pSql, dataBuf->nAllocSize); - taosCleanUpHashTable(pVnodeDataBlockHashList); + taosHashCleanup(pVnodeDataBlockHashList); tfree(dataBuf->pData); tscDestroyBlockArrayList(pVnodeDataBlockList); @@ -761,7 +762,7 @@ int32_t tscMergeTableDataBlocks(SSqlObj* pSql, SDataBlockList* pTableDataBlockLi pCmd->pDataBlocks = pVnodeDataBlockList; tscFreeUnusedDataBlocks(pCmd->pDataBlocks); - taosCleanUpHashTable(pVnodeDataBlockHashList); + taosHashCleanup(pVnodeDataBlockHashList); return TSDB_CODE_SUCCESS; } @@ -769,8 +770,10 @@ int32_t tscMergeTableDataBlocks(SSqlObj* pSql, SDataBlockList* pTableDataBlockLi void tscCloseTscObj(STscObj* pObj) { pObj->signature = NULL; SSqlObj* pSql = pObj->pSql; - globalCode = pSql->res.code; - + if (pSql) { + globalCode = pSql->res.code; + } + taosTmrStopA(&(pObj->pTimer)); tscFreeSqlObj(pSql); @@ -1059,8 +1062,8 @@ void tscClearFieldInfo(SFieldInfo* pFieldInfo) { for(int32_t i = 0; i < pFieldInfo->numOfOutputCols; ++i) { if (pFieldInfo->pExpr[i] != NULL) { - tSQLBinaryExprDestroy(&pFieldInfo->pExpr[i]->pBinExprInfo.pBinExpr, NULL); - tfree(pFieldInfo->pExpr[i]->pBinExprInfo.pReqColumns); + tSQLBinaryExprDestroy(&pFieldInfo->pExpr[i]->binExprInfo.pBinExpr, NULL); + tfree(pFieldInfo->pExpr[i]->binExprInfo.pReqColumns); tfree(pFieldInfo->pExpr[i]); } } @@ -1114,7 +1117,7 @@ SSqlExpr* tscSqlExprInsertEmpty(SQueryInfo* pQueryInfo, int32_t index, int16_t f SSqlExpr* tscSqlExprInsert(SQueryInfo* pQueryInfo, int32_t index, int16_t functionId, SColumnIndex* pColIndex, int16_t type, int16_t size, int16_t interSize) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, pColIndex->tableIndex); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, pColIndex->tableIndex); SSqlExprInfo* pExprInfo = &pQueryInfo->exprsInfo; @@ -1125,13 +1128,13 @@ SSqlExpr* tscSqlExprInsert(SQueryInfo* pQueryInfo, int32_t index, int16_t functi pExprInfo->pExprs[index] = pExpr; pExpr->functionId = functionId; - int16_t numOfCols = pMeterMetaInfo->pMeterMeta->numOfColumns; + int16_t numOfCols = tscGetNumOfColumns(pTableMetaInfo->pTableMeta); // set the correct column index if (pColIndex->columnIndex == TSDB_TBNAME_COLUMN_INDEX) { pExpr->colInfo.colId = TSDB_TBNAME_COLUMN_INDEX; } else { - SSchema* pSchema = tsGetColumnSchema(pMeterMetaInfo->pMeterMeta, pColIndex->columnIndex); + SSchema* pSchema = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, pColIndex->columnIndex); pExpr->colInfo.colId = pSchema->colId; } @@ -1151,7 +1154,7 @@ SSqlExpr* tscSqlExprInsert(SQueryInfo* pQueryInfo, int32_t index, int16_t functi pExpr->resType = type; pExpr->resBytes = size; pExpr->interResBytes = interSize; - pExpr->uid = pMeterMetaInfo->pMeterMeta->uid; + pExpr->uid = pTableMetaInfo->pTableMeta->uid; pExprInfo->numOfExprs++; return pExpr; @@ -1159,7 +1162,7 @@ SSqlExpr* tscSqlExprInsert(SQueryInfo* pQueryInfo, int32_t index, int16_t functi SSqlExpr* tscSqlExprUpdate(SQueryInfo* pQueryInfo, int32_t index, int16_t functionId, int16_t srcColumnIndex, int16_t type, int16_t size) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); SSqlExprInfo* pExprInfo = &pQueryInfo->exprsInfo; if (index > pExprInfo->numOfExprs) { return NULL; @@ -1170,7 +1173,7 @@ SSqlExpr* tscSqlExprUpdate(SQueryInfo* pQueryInfo, int32_t index, int16_t functi pExpr->functionId = functionId; pExpr->colInfo.colIdx = srcColumnIndex; - pExpr->colInfo.colId = tsGetColumnSchema(pMeterMetaInfo->pMeterMeta, srcColumnIndex)->colId; + pExpr->colInfo.colId = tscGetTableColumnSchema(pTableMetaInfo->pTableMeta, srcColumnIndex)->colId; pExpr->resType = type; pExpr->resBytes = size; @@ -1571,17 +1574,19 @@ void tscIncStreamExecutionCount(void* pStream) { ps->num += 1; } -bool tscValidateColumnId(SMeterMetaInfo* pMeterMetaInfo, int32_t colId) { - if (pMeterMetaInfo->pMeterMeta == NULL) { +bool tscValidateColumnId(STableMetaInfo* pTableMetaInfo, int32_t colId) { + if (pTableMetaInfo->pTableMeta == NULL) { return false; } - if (colId == -1 && UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { + if (colId == -1 && UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { return true; } - SSchema* pSchema = tsGetSchema(pMeterMetaInfo->pMeterMeta); - int32_t numOfTotal = pMeterMetaInfo->pMeterMeta->numOfTags + pMeterMetaInfo->pMeterMeta->numOfColumns; + SSchema* pSchema = tscGetTableSchema(pTableMetaInfo->pTableMeta); + STableComInfo tinfo = tscGetTableInfo(pTableMetaInfo->pTableMeta); + + int32_t numOfTotal = tinfo.numOfTags + tinfo.numOfColumns; for (int32_t i = 0; i < numOfTotal; ++i) { if (pSchema[i].colId == colId) { @@ -1625,16 +1630,16 @@ void tscTagCondRelease(STagCond* pCond) { } void tscGetSrcColumnInfo(SSrcColumnInfo* pColInfo, SQueryInfo* pQueryInfo) { - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - SSchema* pSchema = tsGetSchema(pMeterMetaInfo->pMeterMeta); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + SSchema* pSchema = tscGetTableSchema(pTableMetaInfo->pTableMeta); for (int32_t i = 0; i < pQueryInfo->exprsInfo.numOfExprs; ++i) { SSqlExpr* pExpr = tscSqlExprGet(pQueryInfo, i); pColInfo[i].functionId = pExpr->functionId; if (TSDB_COL_IS_TAG(pExpr->colInfo.flag)) { - SSchema* pTagSchema = tsGetTagSchema(pMeterMetaInfo->pMeterMeta); - int16_t actualTagIndex = pMeterMetaInfo->tagColumnIndex[pExpr->colInfo.colIdx]; + SSchema* pTagSchema = tscGetTableTagSchema(pTableMetaInfo->pTableMeta); + int16_t actualTagIndex = pTableMetaInfo->tagColumnIndex[pExpr->colInfo.colIdx]; pColInfo[i].type = (actualTagIndex != -1) ? pTagSchema[actualTagIndex].type : TSDB_DATA_TYPE_BINARY; } else { @@ -1721,10 +1726,10 @@ bool tscShouldFreeAsyncSqlObj(SSqlObj* pSql) { SDataBlockList* pDataBlocks = pCmd->pDataBlocks; SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(&pSql->cmd, 0); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); assert(pQueryInfo->numOfTables == 1 || pQueryInfo->numOfTables == 2); - if (pDataBlocks == NULL || pMeterMetaInfo->vnodeIndex >= pDataBlocks->nSize) { + if (pDataBlocks == NULL || pTableMetaInfo->vnodeIndex >= pDataBlocks->nSize) { tscTrace("%p object should be release since all data blocks have been submit", pSql); return true; } else { @@ -1743,7 +1748,7 @@ bool tscShouldFreeAsyncSqlObj(SSqlObj* pSql) { * @param tableIndex denote the table index for join query, where more than one table exists * @return */ -SMeterMetaInfo* tscGetMeterMetaInfo(SSqlCmd* pCmd, int32_t clauseIndex, int32_t tableIndex) { +STableMetaInfo* tscGetTableMetaInfoFromCmd(SSqlCmd* pCmd, int32_t clauseIndex, int32_t tableIndex) { if (pCmd == NULL || pCmd->numOfClause == 0) { return NULL; } @@ -1751,20 +1756,20 @@ SMeterMetaInfo* tscGetMeterMetaInfo(SSqlCmd* pCmd, int32_t clauseIndex, int32_t assert(clauseIndex >= 0 && clauseIndex < pCmd->numOfClause); SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, clauseIndex); - return tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, tableIndex); + return tscGetMetaInfo(pQueryInfo, tableIndex); } -SMeterMetaInfo* tscGetMeterMetaInfoFromQueryInfo(SQueryInfo* pQueryInfo, int32_t tableIndex) { +STableMetaInfo* tscGetMetaInfo(SQueryInfo* pQueryInfo, int32_t tableIndex) { assert(pQueryInfo != NULL); - if (pQueryInfo->pMeterInfo == NULL) { + if (pQueryInfo->pTableMetaInfo == NULL) { assert(pQueryInfo->numOfTables == 0); return NULL; } - assert(tableIndex >= 0 && tableIndex <= pQueryInfo->numOfTables && pQueryInfo->pMeterInfo != NULL); + assert(tableIndex >= 0 && tableIndex <= pQueryInfo->numOfTables && pQueryInfo->pTableMetaInfo != NULL); - return pQueryInfo->pMeterInfo[tableIndex]; + return pQueryInfo->pTableMetaInfo[tableIndex]; } SQueryInfo* tscGetQueryInfoDetail(SSqlCmd* pCmd, int32_t subClauseIndex) { @@ -1793,11 +1798,11 @@ int32_t tscGetQueryInfoDetailSafely(SSqlCmd* pCmd, int32_t subClauseIndex, SQuer return TSDB_CODE_SUCCESS; } -SMeterMetaInfo* tscGetMeterMetaInfoByUid(SQueryInfo* pQueryInfo, uint64_t uid, int32_t* index) { +STableMetaInfo* tscGetMeterMetaInfoByUid(SQueryInfo* pQueryInfo, uint64_t uid, int32_t* index) { int32_t k = -1; for (int32_t i = 0; i < pQueryInfo->numOfTables; ++i) { - if (pQueryInfo->pMeterInfo[i]->pMeterMeta->uid == uid) { + if (pQueryInfo->pTableMetaInfo[i]->pTableMeta->uid == uid) { k = i; break; } @@ -1808,7 +1813,7 @@ SMeterMetaInfo* tscGetMeterMetaInfoByUid(SQueryInfo* pQueryInfo, uint64_t uid, i } assert(k != -1); - return tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, k); + return tscGetMetaInfo(pQueryInfo, k); } int32_t tscAddSubqueryInfo(SSqlCmd* pCmd) { @@ -1870,37 +1875,37 @@ void tscFreeSubqueryInfo(SSqlCmd* pCmd) { tfree(pCmd->pQueryInfo); } -SMeterMetaInfo* tscAddMeterMetaInfo(SQueryInfo* pQueryInfo, const char* name, STableMeta* pMeterMeta, +STableMetaInfo* tscAddMeterMetaInfo(SQueryInfo* pQueryInfo, const char* name, STableMeta* pTableMeta, SSuperTableMeta* pMetricMeta, int16_t numOfTags, int16_t* tags) { - void* pAlloc = realloc(pQueryInfo->pMeterInfo, (pQueryInfo->numOfTables + 1) * POINTER_BYTES); + void* pAlloc = realloc(pQueryInfo->pTableMetaInfo, (pQueryInfo->numOfTables + 1) * POINTER_BYTES); if (pAlloc == NULL) { return NULL; } - pQueryInfo->pMeterInfo = pAlloc; - pQueryInfo->pMeterInfo[pQueryInfo->numOfTables] = calloc(1, sizeof(SMeterMetaInfo)); + pQueryInfo->pTableMetaInfo = pAlloc; + pQueryInfo->pTableMetaInfo[pQueryInfo->numOfTables] = calloc(1, sizeof(STableMetaInfo)); - SMeterMetaInfo* pMeterMetaInfo = pQueryInfo->pMeterInfo[pQueryInfo->numOfTables]; - assert(pMeterMetaInfo != NULL); + STableMetaInfo* pTableMetaInfo = pQueryInfo->pTableMetaInfo[pQueryInfo->numOfTables]; + assert(pTableMetaInfo != NULL); if (name != NULL) { assert(strlen(name) <= TSDB_TABLE_ID_LEN); - strcpy(pMeterMetaInfo->name, name); + strcpy(pTableMetaInfo->name, name); } - pMeterMetaInfo->pMeterMeta = pMeterMeta; - pMeterMetaInfo->pMetricMeta = pMetricMeta; - pMeterMetaInfo->numOfTags = numOfTags; + pTableMetaInfo->pTableMeta = pTableMeta; + pTableMetaInfo->pMetricMeta = pMetricMeta; + pTableMetaInfo->numOfTags = numOfTags; if (tags != NULL) { - memcpy(pMeterMetaInfo->tagColumnIndex, tags, sizeof(pMeterMetaInfo->tagColumnIndex[0]) * numOfTags); + memcpy(pTableMetaInfo->tagColumnIndex, tags, sizeof(pTableMetaInfo->tagColumnIndex[0]) * numOfTags); } pQueryInfo->numOfTables += 1; - return pMeterMetaInfo; + return pTableMetaInfo; } -SMeterMetaInfo* tscAddEmptyMeterMetaInfo(SQueryInfo* pQueryInfo) { +STableMetaInfo* tscAddEmptyMetaInfo(SQueryInfo* pQueryInfo) { return tscAddMeterMetaInfo(pQueryInfo, NULL, NULL, NULL, 0, NULL); } @@ -1909,14 +1914,14 @@ void doRemoveMeterMetaInfo(SQueryInfo* pQueryInfo, int32_t index, bool removeFro return; } - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, index); + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, index); - tscClearMeterMetaInfo(pMeterMetaInfo, removeFromCache); - free(pMeterMetaInfo); + tscClearMeterMetaInfo(pTableMetaInfo, removeFromCache); + free(pTableMetaInfo); int32_t after = pQueryInfo->numOfTables - index - 1; if (after > 0) { - memmove(&pQueryInfo->pMeterInfo[index], &pQueryInfo->pMeterInfo[index + 1], after * POINTER_BYTES); + memmove(&pQueryInfo->pTableMetaInfo[index], &pQueryInfo->pTableMetaInfo[index + 1], after * POINTER_BYTES); } pQueryInfo->numOfTables -= 1; @@ -1930,16 +1935,16 @@ void tscRemoveAllMeterMetaInfo(SQueryInfo* pQueryInfo, const char* address, bool doRemoveMeterMetaInfo(pQueryInfo, --index, removeFromCache); } - tfree(pQueryInfo->pMeterInfo); + tfree(pQueryInfo->pTableMetaInfo); } -void tscClearMeterMetaInfo(SMeterMetaInfo* pMeterMetaInfo, bool removeFromCache) { - if (pMeterMetaInfo == NULL) { +void tscClearMeterMetaInfo(STableMetaInfo* pTableMetaInfo, bool removeFromCache) { + if (pTableMetaInfo == NULL) { return; } - taosRemoveDataFromCache(tscCacheHandle, (void**)&(pMeterMetaInfo->pMeterMeta), removeFromCache); - taosRemoveDataFromCache(tscCacheHandle, (void**)&(pMeterMetaInfo->pMetricMeta), removeFromCache); + taosCacheRelease(tscCacheHandle, (void**)&(pTableMetaInfo->pTableMeta), removeFromCache); + taosCacheRelease(tscCacheHandle, (void**)&(pTableMetaInfo->pMetricMeta), removeFromCache); } void tscResetForNextRetrieve(SSqlRes* pRes) { @@ -1953,11 +1958,11 @@ void tscResetForNextRetrieve(SSqlRes* pRes) { SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void* param, SSqlObj* pPrevSql) { SSqlCmd* pCmd = &pSql->cmd; - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfo(pCmd, pCmd->clauseIndex, tableIndex); + STableMetaInfo* pTableMetaInfo = tscGetTableMetaInfoFromCmd(pCmd, pCmd->clauseIndex, tableIndex); SSqlObj* pNew = (SSqlObj*)calloc(1, sizeof(SSqlObj)); if (pNew == NULL) { - tscError("%p new subquery failed, tableIndex:%d, vnodeIndex:%d", pSql, tableIndex, pMeterMetaInfo->vnodeIndex); + tscError("%p new subquery failed, tableIndex:%d, vnodeIndex:%d", pSql, tableIndex, pTableMetaInfo->vnodeIndex); return NULL; } @@ -1966,7 +1971,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void pNew->sqlstr = strdup(pSql->sqlstr); if (pNew->sqlstr == NULL) { - tscError("%p new subquery failed, tableIndex:%d, vnodeIndex:%d", pSql, tableIndex, pMeterMetaInfo->vnodeIndex); + tscError("%p new subquery failed, tableIndex:%d, vnodeIndex:%d", pSql, tableIndex, pTableMetaInfo->vnodeIndex); free(pNew); return NULL; @@ -1995,7 +2000,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void memset(&pNewQueryInfo->colList, 0, sizeof(pNewQueryInfo->colList)); memset(&pNewQueryInfo->fieldsInfo, 0, sizeof(SFieldInfo)); - pNewQueryInfo->pMeterInfo = NULL; + pNewQueryInfo->pTableMetaInfo = NULL; pNewQueryInfo->defaultVal = NULL; pNewQueryInfo->numOfTables = 0; pNewQueryInfo->tsBuf = NULL; @@ -2008,7 +2013,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void } if (tscAllocPayload(&pNew->cmd, TSDB_DEFAULT_PAYLOAD_SIZE) != TSDB_CODE_SUCCESS) { - tscError("%p new subquery failed, tableIndex:%d, vnodeIndex:%d", pSql, tableIndex, pMeterMetaInfo->vnodeIndex); + tscError("%p new subquery failed, tableIndex:%d, vnodeIndex:%d", pSql, tableIndex, pTableMetaInfo->vnodeIndex); tscFreeSqlObj(pNew); return NULL; } @@ -2023,7 +2028,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void pNewQueryInfo->type |= TSDB_QUERY_TYPE_SUBQUERY; // it must be the subquery } - uint64_t uid = pMeterMetaInfo->pMeterMeta->uid; + uint64_t uid = pTableMetaInfo->pTableMeta->uid; tscSqlExprCopy(&pNewQueryInfo->exprsInfo, &pQueryInfo->exprsInfo, uid, true); int32_t numOfOutputCols = pNewQueryInfo->exprsInfo.numOfExprs; @@ -2065,34 +2070,34 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void printf("the metricmeta key is:%s\n", key); #endif - char* name = pMeterMetaInfo->name; - SMeterMetaInfo* pFinalInfo = NULL; + char* name = pTableMetaInfo->name; + STableMetaInfo* pFinalInfo = NULL; if (pPrevSql == NULL) { - STableMeta* pMeterMeta = taosGetDataFromCache(tscCacheHandle, name); - SSuperTableMeta* pMetricMeta = taosGetDataFromCache(tscCacheHandle, key); + STableMeta* pTableMeta = taosCacheAcquireByName(tscCacheHandle, name); + SSuperTableMeta* pMetricMeta = taosCacheAcquireByName(tscCacheHandle, key); - pFinalInfo = tscAddMeterMetaInfo(pNewQueryInfo, name, pMeterMeta, pMetricMeta, pMeterMetaInfo->numOfTags, - pMeterMetaInfo->tagColumnIndex); - } else { // transfer the ownership of pMeterMeta/pMetricMeta to the newly create sql object. - SMeterMetaInfo* pPrevInfo = tscGetMeterMetaInfo(&pPrevSql->cmd, pPrevSql->cmd.clauseIndex, 0); + pFinalInfo = tscAddMeterMetaInfo(pNewQueryInfo, name, pTableMeta, pMetricMeta, pTableMetaInfo->numOfTags, + pTableMetaInfo->tagColumnIndex); + } else { // transfer the ownership of pTableMeta/pMetricMeta to the newly create sql object. + STableMetaInfo* pPrevInfo = tscGetTableMetaInfoFromCmd(&pPrevSql->cmd, pPrevSql->cmd.clauseIndex, 0); - STableMeta* pPrevMeterMeta = taosTransferDataInCache(tscCacheHandle, (void**)&pPrevInfo->pMeterMeta); - SSuperTableMeta* pPrevMetricMeta = taosTransferDataInCache(tscCacheHandle, (void**)&pPrevInfo->pMetricMeta); + STableMeta* pPrevMeterMeta = taosCacheTransfer(tscCacheHandle, (void**)&pPrevInfo->pTableMeta); + SSuperTableMeta* pPrevMetricMeta = taosCacheTransfer(tscCacheHandle, (void**)&pPrevInfo->pMetricMeta); - pFinalInfo = tscAddMeterMetaInfo(pNewQueryInfo, name, pPrevMeterMeta, pPrevMetricMeta, pMeterMetaInfo->numOfTags, - pMeterMetaInfo->tagColumnIndex); + pFinalInfo = tscAddMeterMetaInfo(pNewQueryInfo, name, pPrevMeterMeta, pPrevMetricMeta, pTableMetaInfo->numOfTags, + pTableMetaInfo->tagColumnIndex); } - assert(pFinalInfo->pMeterMeta != NULL && pNewQueryInfo->numOfTables == 1); - if (UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo)) { + assert(pFinalInfo->pTableMeta != NULL && pNewQueryInfo->numOfTables == 1); + if (UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo)) { assert(pFinalInfo->pMetricMeta != NULL); } tscTrace( "%p new subquery: %p, tableIndex:%d, vnodeIdx:%d, type:%d, exprInfo:%d, colList:%d," "fieldInfo:%d, name:%s, qrang:%" PRId64 " - %" PRId64 " order:%d, limit:%" PRId64, - pSql, pNew, tableIndex, pMeterMetaInfo->vnodeIndex, pNewQueryInfo->type, pNewQueryInfo->exprsInfo.numOfExprs, + pSql, pNew, tableIndex, pTableMetaInfo->vnodeIndex, pNewQueryInfo->type, pNewQueryInfo->exprsInfo.numOfExprs, pNewQueryInfo->colList.numOfCols, pNewQueryInfo->fieldsInfo.numOfOutputCols, pFinalInfo->name, pNewQueryInfo->stime, pNewQueryInfo->etime, pNewQueryInfo->order.order, pNewQueryInfo->limit.limit); @@ -2103,7 +2108,6 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void void tscDoQuery(SSqlObj* pSql) { SSqlCmd* pCmd = &pSql->cmd; - void* fp = pSql->fp; pSql->res.code = TSDB_CODE_SUCCESS; @@ -2119,7 +2123,6 @@ void tscDoQuery(SSqlObj* pSql) { } else { // pSql may be released in this function if it is a async insertion. tscProcessSql(pSql); - if (NULL == fp) tscProcessMultiVnodesInsert(pSql); } } } @@ -2187,14 +2190,14 @@ bool hasMoreVnodesToTry(SSqlObj* pSql) { SQueryInfo* pQueryInfo = tscGetQueryInfoDetail(pCmd, pCmd->clauseIndex); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - if (!UTIL_METER_IS_SUPERTABLE(pMeterMetaInfo) || (pMeterMetaInfo->pMetricMeta == NULL)) { + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + if (!UTIL_TABLE_IS_SUPERTABLE(pTableMetaInfo) || (pTableMetaInfo->pMetricMeta == NULL)) { return false; } - int32_t totalVnode = pMeterMetaInfo->pMetricMeta->numOfVnodes; + int32_t totalVnode = pTableMetaInfo->pMetricMeta->numOfVnodes; return pRes->numOfRows == 0 && tscNonOrderedProjectionQueryOnSTable(pQueryInfo, 0) && - (!tscHasReachLimitation(pQueryInfo, pRes)) && (pMeterMetaInfo->vnodeIndex < totalVnode - 1); + (!tscHasReachLimitation(pQueryInfo, pRes)) && (pTableMetaInfo->vnodeIndex < totalVnode - 1); } void tscTryQueryNextVnode(SSqlObj* pSql, __async_cb_func_t fp) { @@ -2209,12 +2212,12 @@ void tscTryQueryNextVnode(SSqlObj* pSql, __async_cb_func_t fp) { */ assert(pRes->numOfRows == 0 && tscNonOrderedProjectionQueryOnSTable(pQueryInfo, 0) && !tscHasReachLimitation(pQueryInfo, pRes)); - SMeterMetaInfo* pMeterMetaInfo = tscGetMeterMetaInfoFromQueryInfo(pQueryInfo, 0); - int32_t totalVnode = pMeterMetaInfo->pMetricMeta->numOfVnodes; + STableMetaInfo* pTableMetaInfo = tscGetMetaInfo(pQueryInfo, 0); + int32_t totalVnode = pTableMetaInfo->pMetricMeta->numOfVnodes; - while (++pMeterMetaInfo->vnodeIndex < totalVnode) { + while (++pTableMetaInfo->vnodeIndex < totalVnode) { tscTrace("%p current vnode:%d exhausted, try next:%d. total vnode:%d. current numOfRes:%d", pSql, - pMeterMetaInfo->vnodeIndex - 1, pMeterMetaInfo->vnodeIndex, totalVnode, pRes->numOfTotalInCurrentClause); + pTableMetaInfo->vnodeIndex - 1, pTableMetaInfo->vnodeIndex, totalVnode, pRes->numOfTotalInCurrentClause); /* * update the limit and offset value for the query on the next vnode, @@ -2233,7 +2236,7 @@ void tscTryQueryNextVnode(SSqlObj* pSql, __async_cb_func_t fp) { assert((pRes->offset >= 0 && pRes->numOfRows == 0) || (pRes->offset == 0 && pRes->numOfRows >= 0)); tscTrace("%p new query to next vnode, vnode index:%d, limit:%" PRId64 ", offset:%" PRId64 ", glimit:%" PRId64, pSql, - pMeterMetaInfo->vnodeIndex, pQueryInfo->limit.limit, pQueryInfo->limit.offset, pQueryInfo->clauseLimit); + pTableMetaInfo->vnodeIndex, pQueryInfo->limit.limit, pQueryInfo->limit.offset, pQueryInfo->clauseLimit); /* * For project query with super table join, the numOfSub is equalled to the number of all subqueries. diff --git a/src/common/inc/dataformat.h b/src/common/inc/dataformat.h index cc13ab2eca3d2ee6281257626b69b490784569e5..aff239712bf1e19ae9d4933d956790e7ed2bec2c 100644 --- a/src/common/inc/dataformat.h +++ b/src/common/inc/dataformat.h @@ -21,7 +21,6 @@ #include "taosdef.h" - #ifdef __cplusplus extern "C" { #endif @@ -51,8 +50,8 @@ void tdSetCol(STColumn *pCol, int8_t type, int16_t colId, int32_t bytes); // ----------------- TSDB SCHEMA DEFINITION typedef struct { - int32_t numOfCols; - int32_t padding; // TODO: replace the padding for useful variable + int numOfCols; // Number of columns appended + int padding; // Total columns allocated STColumn columns[]; } STSchema; @@ -60,81 +59,55 @@ typedef struct { #define schemaColAt(s, i) ((s)->columns + i) STSchema *tdNewSchema(int32_t nCols); +int tdSchemaAppendCol(STSchema *pSchema, int8_t type, int16_t colId, int32_t bytes); STSchema *tdDupSchema(STSchema *pSchema); -void tdFreeSchema(STSchema *pSchema); -void tdUpdateSchema(STSchema *pSchema); +void tdFreeSchema(STSchema *pSchema); +void tdUpdateSchema(STSchema *pSchema); +int tdGetSchemaEncodeSize(STSchema *pSchema); +void * tdEncodeSchema(void *dst, STSchema *pSchema); +STSchema *tdDecodeSchema(void **psrc); // ----------------- Data row structure /* A data row, the format is like below: - * +---------+---------------------------------+ - * | int32_t | | - * +---------+---------------------------------+ - * | len | row | - * +---------+---------------------------------+ + * +----------+---------+---------------------------------+---------------------------------+ + * | int32_t | int32_t | | | + * +----------+---------+---------------------------------+---------------------------------+ + * | len | flen | First part | Second part | + * +----------+---------+---------------------------------+---------------------------------+ + * plen: first part length * len: the length including sizeof(row) + sizeof(len) * row: actual row data encoding */ typedef void *SDataRow; +#define TD_DATA_ROW_HEAD_SIZE (2 * sizeof(int32_t)) + #define dataRowLen(r) (*(int32_t *)(r)) -#define dataRowTuple(r) ((char *)(r) + sizeof(int32_t)) +#define dataRowFLen(r) (*(int32_t *)((char *)(r) + sizeof(int32_t))) +#define dataRowTuple(r) ((char *)(r) + TD_DATA_ROW_HEAD_SIZE) #define dataRowSetLen(r, l) (dataRowLen(r) = (l)) +#define dataRowSetFLen(r, l) (dataRowFLen(r) = (l)) #define dataRowIdx(r, i) ((char *)(r) + i) #define dataRowCpy(dst, r) memcpy((dst), (r), dataRowLen(r)) +#define dataRowAt(r, idx) ((char *)(r) + (idx)) -SDataRow tdNewDataRow(int32_t bytes); -// SDataRow tdNewDdataFromSchema(SSchema *pSchema); +void tdInitDataRow(SDataRow row, STSchema *pSchema); +int tdMaxRowBytesFromSchema(STSchema *pSchema); +SDataRow tdNewDataRow(int32_t bytes, STSchema *pSchema); +SDataRow tdNewDataRowFromSchema(STSchema *pSchema); void tdFreeDataRow(SDataRow row); -// int32_t tdAppendColVal(SDataRow row, void *value, SColumn *pCol, int32_t suffixOffset); -void tdDataRowCpy(void *dst, SDataRow row); -void tdDataRowReset(SDataRow row); +int tdAppendColVal(SDataRow row, void *value, STColumn *pCol); +void tdDataRowReset(SDataRow row, STSchema *pSchema); SDataRow tdDataRowDup(SDataRow row); -/* Data rows definition, the format of it is like below: - * +---------+-----------------------+--------+-----------------------+ - * | int32_t | | | | - * +---------+-----------------------+--------+-----------------------+ - * | len | SDataRow | .... | SDataRow | - * +---------+-----------------------+--------+-----------------------+ - */ -typedef void *SDataRows; - -#define TD_DATA_ROWS_HEAD_LEN sizeof(int32_t) - -#define dataRowsLen(rs) (*(int32_t *)(rs)) -#define dataRowsSetLen(rs, l) (dataRowsLen(rs) = (l)) -#define dataRowsInit(rs) dataRowsSetLen(rs, sizeof(int32_t)) +// ----------------- Data column structure +typedef struct SDataCol { + int64_t len; + char data[]; +} SDataCol; -void tdDataRowsAppendRow(SDataRows rows, SDataRow row); - -// Data rows iterator -typedef struct { - int32_t totalLen; - int32_t len; - SDataRow row; -} SDataRowsIter; - -void tdInitSDataRowsIter(SDataRows rows, SDataRowsIter *pIter); -SDataRow tdDataRowsNext(SDataRowsIter *pIter); - -/* Data column definition - * +---------+---------+-----------------------+ - * | int32_t | int32_t | | - * +---------+---------+-----------------------+ - * | len | npoints | data | - * +---------+---------+-----------------------+ - */ -typedef char *SDataCol; - -/* Data columns definition - * +---------+---------+-----------------------+--------+-----------------------+ - * | int32_t | int32_t | | | | - * +---------+---------+-----------------------+--------+-----------------------+ - * | len | npoints | SDataCol | .... | SDataCol | - * +---------+---------+-----------------------+--------+-----------------------+ - */ -typedef char *SDataCols; +void tdConvertDataRowToCol(SDataCol *cols, STSchema *pSchema, int *iter); #ifdef __cplusplus } diff --git a/src/common/inc/name.h b/src/common/inc/name.h new file mode 100644 index 0000000000000000000000000000000000000000..31a6d8fbeb15123aa88a6f68501715822019a632 --- /dev/null +++ b/src/common/inc/name.h @@ -0,0 +1,26 @@ +#ifndef TDENGINE_NAME_H +#define TDENGINE_NAME_H + +#include "os.h" +#include "taosmsg.h" + +typedef struct SDataStatis { + int16_t colId; + int64_t sum; + int64_t max; + int64_t min; + int16_t maxIndex; + int16_t minIndex; + int16_t numOfNull; +} SDataStatis; + +typedef struct SColumnInfoEx { + SColumnInfo info; + void* pData; // the corresponding block data in memory +} SColumnInfoEx; + +int32_t extractTableName(const char *tableId, char *name); + +char* extractDBName(const char *tableId, char *name); + +#endif // TDENGINE_NAME_H diff --git a/src/common/src/dataformat.c b/src/common/src/dataformat.c index 064cb3ff29404b43e6beed99696a5dac33abdd17..9c356b0cbc71671ee8a7d917bf18c0b988f0cb1f 100644 --- a/src/common/src/dataformat.c +++ b/src/common/src/dataformat.c @@ -14,6 +14,8 @@ */ #include "dataformat.h" +static int tdFLenFromSchema(STSchema *pSchema); + /** * Create a new STColumn object * ASSUMPTIONS: VALID PARAMETERS @@ -91,11 +93,37 @@ STSchema *tdNewSchema(int32_t nCols) { STSchema *pSchema = (STSchema *)calloc(1, size); if (pSchema == NULL) return NULL; - pSchema->numOfCols = nCols; + pSchema->numOfCols = 0; return pSchema; } +/** + * Append a column to the schema + */ +int tdSchemaAppendCol(STSchema *pSchema, int8_t type, int16_t colId, int32_t bytes) { + // if (pSchema->numOfCols >= pSchema->totalCols) return -1; + if (!isValidDataType(type, 0)) return -1; + + STColumn *pCol = schemaColAt(pSchema, schemaNCols(pSchema)); + colSetType(pCol, type); + colSetColId(pCol, colId); + colSetOffset(pCol, -1); + switch (type) { + case TSDB_DATA_TYPE_BINARY: + case TSDB_DATA_TYPE_NCHAR: + colSetBytes(pCol, bytes); + break; + default: + colSetBytes(pCol, TYPE_BYTES[type]); + break; + } + + pSchema->numOfCols++; + + return 0; +} + /** * Duplicate the schema and return a new object */ @@ -130,6 +158,61 @@ void tdUpdateSchema(STSchema *pSchema) { } } +/** + * Return the size of encoded schema + */ +int tdGetSchemaEncodeSize(STSchema *pSchema) { + return sizeof(STSchema) + schemaNCols(pSchema) * (T_MEMBER_SIZE(STColumn, type) + T_MEMBER_SIZE(STColumn, colId) + + T_MEMBER_SIZE(STColumn, bytes)); +} + +/** + * Encode a schema to dst, and return the next pointer + */ +void *tdEncodeSchema(void *dst, STSchema *pSchema) { + T_APPEND_MEMBER(dst, pSchema, STSchema, numOfCols); + for (int i = 0; i < schemaNCols(pSchema); i++) { + STColumn *pCol = schemaColAt(pSchema, i); + T_APPEND_MEMBER(dst, pCol, STColumn, type); + T_APPEND_MEMBER(dst, pCol, STColumn, colId); + T_APPEND_MEMBER(dst, pCol, STColumn, bytes); + } + + return dst; +} + +/** + * Decode a schema from a binary. + */ +STSchema *tdDecodeSchema(void **psrc) { + int numOfCols = 0; + + T_READ_MEMBER(*psrc, int, numOfCols); + + STSchema *pSchema = tdNewSchema(numOfCols); + if (pSchema == NULL) return NULL; + for (int i = 0; i < numOfCols; i++) { + int8_t type = 0; + int16_t colId = 0; + int32_t bytes = 0; + T_READ_MEMBER(*psrc, int8_t, type); + T_READ_MEMBER(*psrc, int16_t, colId); + T_READ_MEMBER(*psrc, int32_t, bytes); + + tdSchemaAppendCol(pSchema, type, colId, bytes); + } + + return pSchema; +} + +/** + * Initialize a data row + */ +void tdInitDataRow(SDataRow row, STSchema *pSchema) { + dataRowSetFLen(row, TD_DATA_ROW_HEAD_SIZE); + dataRowSetLen(row, TD_DATA_ROW_HEAD_SIZE + tdFLenFromSchema(pSchema)); +} + /** * Create a data row with maximum row length bytes. * @@ -140,21 +223,37 @@ void tdUpdateSchema(STSchema *pSchema) { * @return SDataRow object for success * NULL for failure */ -SDataRow tdNewDataRow(int32_t bytes) { +SDataRow tdNewDataRow(int32_t bytes, STSchema *pSchema) { int32_t size = sizeof(int32_t) + bytes; SDataRow row = malloc(size); if (row == NULL) return NULL; - dataRowSetLen(row, sizeof(int32_t)); + tdInitDataRow(row, pSchema); return row; } -// SDataRow tdNewDdataFromSchema(SSchema *pSchema) { -// int32_t bytes = tdMaxRowDataBytes(pSchema); -// return tdNewDataRow(bytes); -// } +/** + * Get maximum bytes a data row from a schema + * ASSUMPTIONS: VALID PARAMETER + */ +int tdMaxRowBytesFromSchema(STSchema *pSchema) { + // TODO + int bytes = TD_DATA_ROW_HEAD_SIZE; + for (int i = 0; i < schemaNCols(pSchema); i++) { + STColumn *pCol = schemaColAt(pSchema, i); + bytes += TYPE_BYTES[pCol->type]; + + if (pCol->type == TSDB_DATA_TYPE_BINARY || pCol->type == TSDB_DATA_TYPE_NCHAR) { + bytes += pCol->bytes; + } + } + + return bytes; +} + +SDataRow tdNewDataRowFromSchema(STSchema *pSchema) { return tdNewDataRow(tdMaxRowBytesFromSchema(pSchema), pSchema); } /** * Free the SDataRow object @@ -164,90 +263,56 @@ void tdFreeDataRow(SDataRow row) { } /** - * Append a column value to a SDataRow object. - * NOTE: THE APPLICATION SHOULD MAKE SURE VALID PARAMETERS. THE FUNCTION ASSUMES - * THE ROW OBJECT HAS ENOUGH SPACE TO HOLD THE VALUE. - * - * @param row the row to append value to - * @param value value pointer to append - * @param pSchema schema - * @param colIdx column index - * - * @return 0 for success and -1 for failure - */ -// int32_t tdAppendColVal(SDataRow row, void *value, SColumn *pCol, int32_t suffixOffset) { -// int32_t offset; - -// switch (pCol->type) { -// case TD_DATATYPE_BOOL: -// case TD_DATATYPE_TINYINT: -// case TD_DATATYPE_SMALLINT: -// case TD_DATATYPE_INT: -// case TD_DATATYPE_BIGINT: -// case TD_DATATYPE_FLOAT: -// case TD_DATATYPE_DOUBLE: -// case TD_DATATYPE_TIMESTAMP: -// memcpy(dataRowIdx(row, pCol->offset + sizeof(int32_t)), value, rowDataLen[pCol->type]); -// if (dataRowLen(row) < suffixOffset + sizeof(int32_t)) -// dataRowSetLen(row, dataRowLen(row) + rowDataLen[pCol->type]); -// break; -// case TD_DATATYPE_VARCHAR: -// offset = dataRowLen(row) > suffixOffset ? dataRowLen(row) : suffixOffset; -// memcpy(dataRowIdx(row, pCol->offset+sizeof(int32_t)), (void *)(&offset), sizeof(offset)); -// case TD_DATATYPE_NCHAR: -// case TD_DATATYPE_BINARY: -// break; -// default: -// return -1; -// } - -// return 0; -// } - -/** - * Copy a data row to a destination - * ASSUMPTIONS: dst has enough room for a copy of row - */ -void tdDataRowCpy(void *dst, SDataRow row) { memcpy(dst, row, dataRowLen(row)); } -void tdDataRowReset(SDataRow row) { dataRowSetLen(row, sizeof(int32_t)); } + * Append a column value to the data row + */ +int tdAppendColVal(SDataRow row, void *value, STColumn *pCol) { + switch (colType(pCol)) + { + case TSDB_DATA_TYPE_BINARY: + case TSDB_DATA_TYPE_NCHAR: + *(int32_t *)dataRowAt(row, dataRowFLen(row)) = dataRowLen(row); + dataRowFLen(row) += TYPE_BYTES[colType(pCol)]; + memcpy((void *)dataRowAt(row, dataRowLen(row)), value, strlen(value)); + dataRowLen(row) += strlen(value); + break; + default: + memcpy(dataRowAt(row, dataRowFLen(row)), value, TYPE_BYTES[colType(pCol)]); + dataRowFLen(row) += TYPE_BYTES[colType(pCol)]; + break; + } + + return 0; +} + +void tdDataRowReset(SDataRow row, STSchema *pSchema) { tdInitDataRow(row, pSchema); } + SDataRow tdDataRowDup(SDataRow row) { - SDataRow trow = tdNewDataRow(dataRowLen(row)); + SDataRow trow = malloc(dataRowLen(row)); if (trow == NULL) return NULL; dataRowCpy(trow, row); - return row; + return trow; } -void tdDataRowsAppendRow(SDataRows rows, SDataRow row) { - tdDataRowCpy((void *)((char *)rows + dataRowsLen(rows)), row); - dataRowsSetLen(rows, dataRowsLen(rows) + dataRowLen(row)); -} - -// Initialize the iterator -void tdInitSDataRowsIter(SDataRows rows, SDataRowsIter *pIter) { - if (pIter == NULL) return; - pIter->totalLen = dataRowsLen(rows); +void tdConvertDataRowToCol(SDataCol *cols, STSchema *pSchema, int *iter) { + int row = *iter; - if (pIter->totalLen == TD_DATA_ROWS_HEAD_LEN) { - pIter->row = NULL; - return; + for (int i = 0; i < schemaNCols(pSchema); i++) { + // TODO } - pIter->row = (SDataRow)((char *)rows + TD_DATA_ROWS_HEAD_LEN); - pIter->len = TD_DATA_ROWS_HEAD_LEN + dataRowLen(pIter->row); + *iter = row + 1; } -// Get the next row in Rows -SDataRow tdDataRowsNext(SDataRowsIter *pIter) { - SDataRow row = pIter->row; - if (row == NULL) return NULL; - - if (pIter->len >= pIter->totalLen) { - pIter->row = NULL; - } else { - pIter->row = (char *)row + dataRowLen(row); - pIter->len += dataRowLen(row); +/** + * Return the first part length of a data row for a schema + */ +static int tdFLenFromSchema(STSchema *pSchema) { + int ret = 0; + for (int i = 0; i < schemaNCols(pSchema); i++) { + STColumn *pCol = schemaColAt(pSchema, i); + ret += TYPE_BYTES[pCol->type]; } - return row; + return ret; } \ No newline at end of file diff --git a/src/common/src/name.c b/src/common/src/name.c new file mode 100644 index 0000000000000000000000000000000000000000..cf1eaa2bf11ce6ac6707e26d124feeaacb19fb20 --- /dev/null +++ b/src/common/src/name.c @@ -0,0 +1,42 @@ +#include "os.h" +#include "tutil.h" + +#include "name.h" +#include "tstoken.h" +#include "ttokendef.h" + +// todo refactor +static FORCE_INLINE const char* skipSegments(const char* input, char delim, int32_t num) { + for (int32_t i = 0; i < num; ++i) { + while (*input != 0 && *input++ != delim) { + }; + } + return input; +} + +static FORCE_INLINE size_t copy(char* dst, const char* src, char delimiter) { + size_t len = 0; + while (*src != delimiter && *src != 0) { + *dst++ = *src++; + len++; + } + + return len; +} + +int32_t extractTableName(const char* tableId, char* name) { + size_t offset = strcspn(tableId, &TS_PATH_DELIMITER[0]); + offset = strcspn(&tableId[offset], &TS_PATH_DELIMITER[0]); + + return strncpy(name, &tableId[offset], TSDB_TABLE_NAME_LEN); + +// char* r = skipSegments(tableId, TS_PATH_DELIMITER[0], 2); +// return copy(name, r, TS_PATH_DELIMITER[0]); +} + +char* extractDBName(const char* tableId, char* name) { + size_t offset1 = strcspn(tableId, &TS_PATH_DELIMITER[0]); + size_t len = strcspn(&tableId[offset1 + 1], &TS_PATH_DELIMITER[0]); + + return strncpy(name, &tableId[offset1 + 1], len); +} diff --git a/src/common/src/ttypes.c b/src/common/src/ttypes.c index 14b4d593fb9f2a614c804db3dcd8537b87aae6a7..2f4aa6ab765c71fa841ff5246e3e88ae37fd054b 100644 --- a/src/common/src/ttypes.c +++ b/src/common/src/ttypes.c @@ -26,9 +26,9 @@ const int32_t TYPE_BYTES[11] = { sizeof(int64_t), // TSDB_DATA_TYPE_BIGINT sizeof(float), // TSDB_DATA_TYPE_FLOAT sizeof(double), // TSDB_DATA_TYPE_DOUBLE - -1, // TSDB_DATA_TYPE_BINARY + sizeof(int32_t), // TSDB_DATA_TYPE_BINARY sizeof(TSKEY), // TSDB_DATA_TYPE_TIMESTAMP - -1 // TSDB_DATA_TYPE_NCHAR + sizeof(int32_t) // TSDB_DATA_TYPE_NCHAR }; tDataTypeDescriptor tDataTypeDesc[11] = { diff --git a/src/dnode/inc/dnodeMClient.h b/src/dnode/inc/dnodeMClient.h index 391e8da2c10382491bd089f98481977229b5b528..cab9ea9be4de577561b580e0376efcd721003370 100644 --- a/src/dnode/inc/dnodeMClient.h +++ b/src/dnode/inc/dnodeMClient.h @@ -20,8 +20,10 @@ extern "C" { #endif -int32_t dnodeInitMClient(); -void dnodeCleanupMClient(); +int32_t dnodeInitMClient(); +void dnodeCleanupMClient(); +void dnodeSendMsgToMnode(SRpcMsg *rpcMsg); +uint32_t dnodeGetMnodeMasteIp(); #ifdef __cplusplus } diff --git a/src/dnode/inc/dnodeMgmt.h b/src/dnode/inc/dnodeMgmt.h index bc0ff164a21b735a49e352430eb7d5634ab69b8f..f944bd5adda6a2e9532036b1e4ad6788ec792b1d 100644 --- a/src/dnode/inc/dnodeMgmt.h +++ b/src/dnode/inc/dnodeMgmt.h @@ -22,7 +22,8 @@ extern "C" { int32_t dnodeInitMgmt(); void dnodeCleanupMgmt(); -void dnodeMgmt(void *rpcMsg); +void dnodeMgmt(SRpcMsg *rpcMsg); +void dnodeUpdateDnodeId(int32_t dnodeId); void* dnodeGetVnode(int32_t vgId); int32_t dnodeGetVnodeStatus(void *pVnode); diff --git a/src/dnode/inc/dnodeModule.h b/src/dnode/inc/dnodeModule.h index 1ad97034aa84ec2ce1af8a4e61c54ad46031636d..728630748f8b195721cd554cf590af900c49a1e8 100644 --- a/src/dnode/inc/dnodeModule.h +++ b/src/dnode/inc/dnodeModule.h @@ -20,9 +20,10 @@ extern "C" { #endif -void dnodeAllocModules(); int32_t dnodeInitModules(); -void dnodeCleanUpModules(); +void dnodeCleanUpModules(); +void dnodeStartModules(); +void dnodeProcessModuleStatus(uint32_t moduleStatus); #ifdef __cplusplus } diff --git a/src/dnode/inc/dnodeRead.h b/src/dnode/inc/dnodeRead.h index e4f32b0f8cb40c6a48a93bd003c74eb5e1e0e394..fba3245a0779c43f5617ceeebfb0846460c65981 100644 --- a/src/dnode/inc/dnodeRead.h +++ b/src/dnode/inc/dnodeRead.h @@ -22,7 +22,7 @@ extern "C" { int32_t dnodeInitRead(); void dnodeCleanupRead(); -void dnodeRead(void *pMsg); +void dnodeRead(SRpcMsg *pMsg); void * dnodeAllocateReadWorker(); void dnodeFreeReadWorker(void *rqueue); diff --git a/src/dnode/inc/dnodeSystem.h b/src/dnode/inc/dnodeSystem.h deleted file mode 100644 index 9c56b8db5d31c71afa454791aaf468190e837243..0000000000000000000000000000000000000000 --- a/src/dnode/inc/dnodeSystem.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_DNODE_SYSTEM_H -#define TDENGINE_DNODE_SYSTEM_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum { - TSDB_DNODE_RUN_STATUS_INITIALIZE, - TSDB_DNODE_RUN_STATUS_RUNING, - TSDB_DNODE_RUN_STATUS_STOPPED -} SDnodeRunStatus; - -int32_t dnodeInitSystem(); -void dnodeCleanUpSystem(); -SDnodeRunStatus dnodeGetRunStatus(); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/dnode/inc/dnodeWrite.h b/src/dnode/inc/dnodeWrite.h index f5904555fbdfee8ea7e3b75365e66da7f6f4cdaa..3e37141f9464c822913c41146ec57e9a6e2ef811 100644 --- a/src/dnode/inc/dnodeWrite.h +++ b/src/dnode/inc/dnodeWrite.h @@ -22,8 +22,8 @@ extern "C" { int32_t dnodeInitWrite(); void dnodeCleanupWrite(); -void dnodeWrite(void *pMsg); -void * dnodeAllocateWriteWorker(void *pVnode); +void dnodeWrite(SRpcMsg *pMsg); +void * dnodeAllocateWriteWorker(); void dnodeFreeWriteWorker(void *worker); #ifdef __cplusplus diff --git a/src/dnode/src/dnodeMClient.c b/src/dnode/src/dnodeMClient.c index 3f70caf18e677f1285bfd7f12fa655d5c9e2a380..d1e27a3ef17b7c57dd922e50773f9cee902dc371 100644 --- a/src/dnode/src/dnodeMClient.c +++ b/src/dnode/src/dnodeMClient.c @@ -18,34 +18,56 @@ #include "taosmsg.h" #include "tlog.h" #include "trpc.h" -#include "dnodeSystem.h" +#include "tutil.h" +#include "dnode.h" +#include "dnodeMClient.h" +#include "dnodeModule.h" +#include "dnodeMgmt.h" -static void (*dnodeProcessMgmtRspFp[TSDB_MSG_TYPE_MAX])(SRpcMsg *); +static bool dnodeReadMnodeIpList(); +static void dnodeSaveMnodeIpList(); static void dnodeProcessRspFromMnode(SRpcMsg *pMsg); static void dnodeProcessStatusRsp(SRpcMsg *pMsg); -static void *tsDnodeMClientRpc; +static void (*tsDnodeProcessMgmtRspFp[TSDB_MSG_TYPE_MAX])(SRpcMsg *); +static void *tsDnodeMClientRpc = NULL; +static SRpcIpSet tsDnodeMnodeIpList = {0}; int32_t dnodeInitMClient() { - dnodeProcessMgmtRspFp[TSDB_MSG_TYPE_STATUS_RSP] = dnodeProcessStatusRsp; + if (!dnodeReadMnodeIpList()) { + dTrace("failed to read mnode iplist, set it from cfg file"); + memset(&tsDnodeMnodeIpList, 0, sizeof(SRpcIpSet)); + tsDnodeMnodeIpList.port = tsMnodeDnodePort; + tsDnodeMnodeIpList.numOfIps = 1; + tsDnodeMnodeIpList.ip[0] = inet_addr(tsMasterIp); + if (tsSecondIp[0]) { + tsDnodeMnodeIpList.numOfIps = 2; + tsDnodeMnodeIpList.ip[1] = inet_addr(tsSecondIp); + } + } + tsDnodeProcessMgmtRspFp[TSDB_MSG_TYPE_DM_STATUS_RSP] = dnodeProcessStatusRsp; + SRpcInit rpcInit; memset(&rpcInit, 0, sizeof(rpcInit)); rpcInit.localIp = tsAnyIp ? "0.0.0.0" : tsPrivateIp; rpcInit.localPort = 0; rpcInit.label = "DND-MC"; rpcInit.numOfThreads = 1; - rpcInit.cfp = dnodeProcessRspFromMnode; - rpcInit.sessions = TSDB_SESSIONS_PER_DNODE; + rpcInit.cfp = dnodeProcessRspFromMnode; + rpcInit.sessions = 100; rpcInit.connType = TAOS_CONN_CLIENT; - rpcInit.idleTime = tsShellActivityTimer * 1000; + rpcInit.idleTime = tsShellActivityTimer * 2000; + rpcInit.user = "t"; + rpcInit.ckey = "key"; + rpcInit.secret = "secret"; tsDnodeMClientRpc = rpcOpen(&rpcInit); if (tsDnodeMClientRpc == NULL) { - dError("failed to init connection from mgmt"); + dError("failed to init mnode rpc client"); return -1; } - dPrint("client connection to mgmt is opened"); + dPrint("mnode rpc client is opened"); return 0; } @@ -53,21 +75,122 @@ void dnodeCleanupMClient() { if (tsDnodeMClientRpc) { rpcClose(tsDnodeMClientRpc); tsDnodeMClientRpc = NULL; + dPrint("mnode rpc client is closed"); } } static void dnodeProcessRspFromMnode(SRpcMsg *pMsg) { - - if (dnodeProcessMgmtRspFp[pMsg->msgType]) { - (*dnodeProcessMgmtRspFp[pMsg->msgType])(pMsg); + if (tsDnodeProcessMgmtRspFp[pMsg->msgType]) { + (*tsDnodeProcessMgmtRspFp[pMsg->msgType])(pMsg); } else { - dError("%s is not processed", taosMsg[pMsg->msgType]); + dError("%s is not processed in mnode rpc client", taosMsg[pMsg->msgType]); } rpcFreeCont(pMsg->pCont); } static void dnodeProcessStatusRsp(SRpcMsg *pMsg) { + if (pMsg->code != TSDB_CODE_SUCCESS) { + dError("status rsp is received, error:%s", tstrerror(pMsg->code)); + return; + } + + SDMStatusRsp *pStatusRsp = pMsg->pCont; + if (pStatusRsp->ipList.numOfIps <= 0) { + dError("status msg is invalid, num of ips is %d", pStatusRsp->ipList.numOfIps); + return; + } + + pStatusRsp->ipList.port = htons(pStatusRsp->ipList.port); + for (int32_t i = 0; i < pStatusRsp->ipList.numOfIps; ++i) { + pStatusRsp->ipList.ip[i] = htonl(pStatusRsp->ipList.ip[i]); + } + + //dTrace("status msg is received, result:%s", tstrerror(pMsg->code)); + + if (memcmp(&(pStatusRsp->ipList), &tsDnodeMnodeIpList, sizeof(SRpcIpSet)) != 0) { + dPrint("mnode ip list is changed, numOfIps:%d inUse:%d", pStatusRsp->ipList.numOfIps, pStatusRsp->ipList.inUse); + memcpy(&tsDnodeMnodeIpList, &pStatusRsp->ipList, sizeof(SRpcIpSet)); + for (int32_t i = 0; i < tsDnodeMnodeIpList.numOfIps; ++i) { + dPrint("mnode index:%d ip:%s", i, taosIpStr(tsDnodeMnodeIpList.ip[i])); + } + dnodeSaveMnodeIpList(); + } + + SDnodeState *pState = &pStatusRsp->dnodeState; + pState->numOfVnodes = htonl(pState->numOfVnodes); + pState->moduleStatus = htonl(pState->moduleStatus); + pState->createdTime = htonl(pState->createdTime); + pState->dnodeId = htonl(pState->dnodeId); + + dnodeProcessModuleStatus(pState->moduleStatus); + dnodeUpdateDnodeId(pState->dnodeId); +} + +void dnodeSendMsgToMnode(SRpcMsg *rpcMsg) { + rpcSendRequest(tsDnodeMClientRpc, &tsDnodeMnodeIpList, rpcMsg); +} + +static bool dnodeReadMnodeIpList() { + char ipFile[TSDB_FILENAME_LEN] = {0}; + sprintf(ipFile, "%s/iplist", tsDnodeDir); + + FILE *fp = fopen(ipFile, "r"); + if (!fp) return false; + + char option[32] = {0}; + int32_t value = 0; + int32_t num = 0; + + num = fscanf(fp, "%s %d", option, &value); + if (num != 2) return false; + if (strcmp(option, "inUse") != 0) return false; + tsDnodeMnodeIpList.inUse = (int8_t)value;; + + num = fscanf(fp, "%s %d", option, &value); + if (num != 2) return false; + if (strcmp(option, "numOfIps") != 0) return false; + tsDnodeMnodeIpList.numOfIps = (int8_t)value; + + num = fscanf(fp, "%s %d", option, &value); + if (num != 2) return false; + if (strcmp(option, "port") != 0) return false; + tsDnodeMnodeIpList.port = (uint16_t)value; + + for (int32_t i = 0; i < tsDnodeMnodeIpList.numOfIps; i++) { + num = fscanf(fp, "%s %d", option, &value); + if (num != 2) return false; + if (strncmp(option, "ip", 2) != 0) return false; + tsDnodeMnodeIpList.ip[i] = (uint32_t)value; + } + fclose(fp); + dPrint("read mnode iplist successed"); + for (int32_t i = 0; i < tsDnodeMnodeIpList.numOfIps; i++) { + dPrint("mnode index:%d ip:%s", i, taosIpStr(tsDnodeMnodeIpList.ip[i])); + } + return true; } + +static void dnodeSaveMnodeIpList() { + char ipFile[TSDB_FILENAME_LEN] = {0}; + sprintf(ipFile, "%s/iplist", tsDnodeDir); + + FILE *fp = fopen(ipFile, "w"); + if (!fp) return; + + fprintf(fp, "inUse %d\n", tsDnodeMnodeIpList.inUse); + fprintf(fp, "numOfIps %d\n", tsDnodeMnodeIpList.numOfIps); + fprintf(fp, "port %u\n", tsDnodeMnodeIpList.port); + for (int32_t i = 0; i < tsDnodeMnodeIpList.numOfIps; i++) { + fprintf(fp, "ip%d %u\n", i, tsDnodeMnodeIpList.ip[i]); + } + + fclose(fp); + dPrint("save mnode iplist successed"); +} + +uint32_t dnodeGetMnodeMasteIp() { + return tsDnodeMnodeIpList.ip[0]; +} \ No newline at end of file diff --git a/src/dnode/src/dnodeMain.c b/src/dnode/src/dnodeMain.c new file mode 100644 index 0000000000000000000000000000000000000000..af10fccd010202a9c49e7ef3247cc93a6406e01d --- /dev/null +++ b/src/dnode/src/dnodeMain.c @@ -0,0 +1,258 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#define _DEFAULT_SOURCE +#include "os.h" +#include "tglobalcfg.h" +#include "tlog.h" +#include "tmodule.h" +#include "trpc.h" +#include "tutil.h" +#include "dnode.h" +#include "dnodeMClient.h" +#include "dnodeMgmt.h" +#include "dnodeMnode.h" +#include "dnodeModule.h" +#include "dnodeRead.h" +#include "dnodeShell.h" +#include "dnodeWrite.h" +#ifdef CLUSTER +#include "account.h" +#include "admin.h" +#include "balance.h" +#include "cluster.h" +#include "grant.h" +#include "mpeer.h" +#include "storage.h" +#include "vpeer.h" +#endif + +static int32_t dnodeInitSystem(); +static int32_t dnodeInitStorage(); +static void dnodeInitPlugins(); +static void dnodeCleanupStorage(); +static void dnodeCleanUpSystem(); +static void dnodeSetRunStatus(SDnodeRunStatus status); +static void signal_handler(int32_t signum, siginfo_t *sigInfo, void *context); +static void dnodeCheckDataDirOpenned(char *dir); +static SDnodeRunStatus tsDnodeRunStatus = TSDB_DNODE_RUN_STATUS_STOPPED; +void (*dnodeParseParameterKFp)() = NULL; + +int32_t main(int32_t argc, char *argv[]) { + dnodeInitPlugins(); + + // Set global configuration file + for (int32_t i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-c") == 0) { + if (i < argc - 1) { + strcpy(configDir, argv[++i]); + } else { + printf("'-c' requires a parameter, default:%s\n", configDir); + exit(EXIT_FAILURE); + } + } else if (strcmp(argv[i], "-V") == 0) { + char *versionStr = tsIsCluster ? "enterprise" : "community"; + printf("%s version: %s compatible_version: %s\n", versionStr, version, compatible_version); + printf("gitinfo: %s\n", gitinfo); + printf("gitinfoI: %s\n", gitinfoOfInternal); + printf("buildinfo: %s\n", buildinfo); + exit(EXIT_SUCCESS); + } else if (strcmp(argv[i], "-k") == 0) { + if (dnodeParseParameterKFp) { + dnodeParseParameterKFp(); + exit(EXIT_SUCCESS); + } +#ifdef TAOS_MEM_CHECK + } else if (strcmp(argv[i], "--alloc-random-fail") == 0) { + if ((i < argc - 1) && (argv[i+1][0] != '-')) { + taosSetAllocMode(TAOS_ALLOC_MODE_RANDOM_FAIL, argv[++i], true); + } else { + taosSetAllocMode(TAOS_ALLOC_MODE_RANDOM_FAIL, NULL, true); + } + } else if (strcmp(argv[i], "--detect-mem-leak") == 0) { + if ((i < argc - 1) && (argv[i+1][0] != '-')) { + taosSetAllocMode(TAOS_ALLOC_MODE_DETECT_LEAK, argv[++i], true); + } else { + taosSetAllocMode(TAOS_ALLOC_MODE_DETECT_LEAK, NULL, true); + } +#endif + } + } + + /* Set termination handler. */ + struct sigaction act; + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = signal_handler; + sigaction(SIGTERM, &act, NULL); + sigaction(SIGHUP, &act, NULL); + sigaction(SIGINT, &act, NULL); + sigaction(SIGUSR1, &act, NULL); + sigaction(SIGUSR2, &act, NULL); + + // Open /var/log/syslog file to record information. + openlog("TDengine:", LOG_PID | LOG_CONS | LOG_NDELAY, LOG_LOCAL1); + syslog(LOG_INFO, "Starting TDengine service..."); + + // Initialize the system + if (dnodeInitSystem() < 0) { + syslog(LOG_ERR, "Error initialize TDengine system"); + closelog(); + + dnodeCleanUpSystem(); + exit(EXIT_FAILURE); + } + + syslog(LOG_INFO, "Started TDengine service successfully."); + + while (1) { + sleep(1000); + } +} + +static void signal_handler(int32_t signum, siginfo_t *sigInfo, void *context) { + if (signum == SIGUSR1) { + tsCfgDynamicOptions("debugFlag 135"); + return; + } + if (signum == SIGUSR2) { + tsCfgDynamicOptions("resetlog"); + return; + } + syslog(LOG_INFO, "Shut down signal is %d", signum); + syslog(LOG_INFO, "Shutting down TDengine service..."); + // clean the system. + dPrint("shut down signal is %d, sender PID:%d", signum, sigInfo->si_pid); + dnodeCleanUpSystem(); + // close the syslog + syslog(LOG_INFO, "Shut down TDengine service successfully"); + dPrint("TDengine is shut down!"); + closelog(); + exit(EXIT_SUCCESS); +} + +static int32_t dnodeInitSystem() { + dnodeSetRunStatus(TSDB_DNODE_RUN_STATUS_INITIALIZE); + tscEmbedded = 1; + taosResolveCRC(); + tsReadGlobalLogConfig(); + taosSetCoreDump(); + signal(SIGPIPE, SIG_IGN); + + struct stat dirstat; + if (stat(logDir, &dirstat) < 0) { + mkdir(logDir, 0755); + } + + char temp[TSDB_FILENAME_LEN]; + sprintf(temp, "%s/taosdlog", logDir); + if (taosInitLog(temp, tsNumOfLogLines, 1) < 0) { + printf("failed to init log file\n"); + } + + if (!tsReadGlobalConfig()) { + tsPrintGlobalConfig(); + dError("TDengine read global config failed"); + return -1; + } + tsPrintGlobalConfig(); + + dPrint("Server IP address is:%s", tsPrivateIp); + dPrint("starting to initialize TDengine ..."); + + if (dnodeInitStorage() != 0) return -1; + if (dnodeInitModules() != 0) return -1; + if (dnodeInitRead() != 0) return -1; + if (dnodeInitWrite() != 0) return -1; + if (dnodeInitMClient() != 0) return -1; + if (dnodeInitMnode() != 0) return -1; + if (dnodeInitMgmt() != 0) return -1; + if (dnodeInitShell() != 0) return -1; + + dnodeStartModules(); + dnodeSetRunStatus(TSDB_DNODE_RUN_STATUS_RUNING); + + dPrint("TDengine is initialized successfully"); + + return 0; +} + +static void dnodeCleanUpSystem() { + if (dnodeGetRunStatus() != TSDB_DNODE_RUN_STATUS_STOPPED) { + tclearModuleStatus(TSDB_MOD_MGMT); + dnodeSetRunStatus(TSDB_DNODE_RUN_STATUS_STOPPED); + dnodeCleanupShell(); + dnodeCleanupMClient(); + dnodeCleanupMnode(); + dnodeCleanupMgmt(); + dnodeCleanupWrite(); + dnodeCleanupRead(); + dnodeCleanUpModules(); + dnodeCleanupStorage(); + taosCloseLogger(); + } +} + +SDnodeRunStatus dnodeGetRunStatus() { + return tsDnodeRunStatus; +} + +static void dnodeSetRunStatus(SDnodeRunStatus status) { + tsDnodeRunStatus = status; +} + +static void dnodeCheckDataDirOpenned(char *dir) { + char filepath[256] = {0}; + sprintf(filepath, "%s/.running", dir); + + int32_t fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO); + int32_t ret = flock(fd, LOCK_EX | LOCK_NB); + if (ret != 0) { + dError("failed to lock file:%s ret:%d, database may be running, quit", filepath, ret); + exit(0); + } +} + +static int32_t dnodeInitStorage() { + struct stat dirstat; + if (stat(dataDir, &dirstat) < 0) { + mkdir(dataDir, 0755); + } + + sprintf(tsMnodeDir, "%s/mnode", dataDir); + sprintf(tsVnodeDir, "%s/vnode", dataDir); + sprintf(tsDnodeDir, "%s/dnode", dataDir); + mkdir(tsMnodeDir, 0755); + mkdir(tsVnodeDir, 0755); + mkdir(tsDnodeDir, 0755); + + dnodeCheckDataDirOpenned(tsDnodeDir); + + dPrint("storage directory is initialized"); + return 0; +} + +static void dnodeCleanupStorage() {} + +static void dnodeInitPlugins() { +#ifdef CLUSTER +// acctInit(); +// adminInit(); +// balanceInit(); +// clusterInit(); +// grantInit(); +// mpeerInit(); +// storageInit(); +#endif +} diff --git a/src/dnode/src/dnodeMgmt.c b/src/dnode/src/dnodeMgmt.c index a4cb75241325817f8cb99371dcd2c45b8e408ce2..f6976aa2cc5f2b40471bb013a6241db4317651d8 100644 --- a/src/dnode/src/dnodeMgmt.c +++ b/src/dnode/src/dnodeMgmt.c @@ -22,14 +22,18 @@ #include "trpc.h" #include "tstatus.h" #include "tsdb.h" +#include "ttime.h" +#include "ttimer.h" +#include "dnodeMClient.h" #include "dnodeMgmt.h" #include "dnodeRead.h" #include "dnodeWrite.h" typedef struct { int32_t vgId; // global vnode group ID - int32_t status; // status: master, slave, notready, deleting int32_t refCount; // reference count + int8_t dirty; + int8_t status; // status: master, slave, notready, deleting int64_t version; void *wworker; void *rworker; @@ -42,55 +46,90 @@ typedef struct { static int32_t dnodeOpenVnodes(); static void dnodeCleanupVnodes(); -static int32_t dnodeOpenVnode(int32_t vgId); +static int32_t dnodeOpenVnode(int32_t vnode, char *rootDir); static void dnodeCleanupVnode(SVnodeObj *pVnode); static int32_t dnodeCreateVnode(SMDCreateVnodeMsg *cfg); static void dnodeDropVnode(SVnodeObj *pVnode); -static void dnodeProcesSMDCreateVnodeMsg(SRpcMsg *pMsg); -static void dnodeProcesSMDDropVnodeMsg(SRpcMsg *pMsg); +static void dnodeDoDropVnode(SVnodeObj *pVnode); +static void dnodeProcessCreateVnodeMsg(SRpcMsg *pMsg); +static void dnodeProcessDropVnodeMsg(SRpcMsg *pMsg); static void dnodeProcessAlterVnodeMsg(SRpcMsg *pMsg); +static void dnodeProcessAlterStreamMsg(SRpcMsg *pMsg); +static void dnodeProcessConfigDnodeMsg(SRpcMsg *pMsg); static void (*dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MAX])(SRpcMsg *pMsg); +static void dnodeSendStatusMsg(void *handle, void *tmrId); +static void dnodeReadDnodeId(); -static void * tsDnodeVnodesHash = NULL; +static void *tsDnodeVnodesHash = NULL; +static void *tsDnodeTmr = NULL; +static void *tsStatusTimer = NULL; +static uint32_t tsRebootTime; +static int32_t tsDnodeId = 0; +static char tsDnodeName[TSDB_DNODE_NAME_LEN]; int32_t dnodeInitMgmt() { - dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_CREATE_VNODE] = dnodeProcesSMDCreateVnodeMsg; - dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_DROP_VNODE] = dnodeProcesSMDDropVnodeMsg; + dnodeReadDnodeId(); + + dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_CREATE_VNODE] = dnodeProcessCreateVnodeMsg; + dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_DROP_VNODE] = dnodeProcessDropVnodeMsg; dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_ALTER_VNODE] = dnodeProcessAlterVnodeMsg; + dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_ALTER_STREAM] = dnodeProcessAlterStreamMsg; + dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_CONFIG_DNODE] = dnodeProcessConfigDnodeMsg; tsDnodeVnodesHash = taosInitIntHash(TSDB_MAX_VNODES, sizeof(SVnodeObj), taosHashInt); if (tsDnodeVnodesHash == NULL) { - return TSDB_CODE_SERV_OUT_OF_MEMORY; + dError("failed to init vnode list"); + return -1; } + tsRebootTime = taosGetTimestampSec(); + + tsDnodeTmr = taosTmrInit(100, 200, 60000, "DND-DM"); + if (tsDnodeTmr == NULL) { + dError("failed to init dnode timer"); + return -1; + } + taosTmrReset(dnodeSendStatusMsg, 500, NULL, tsDnodeTmr, &tsStatusTimer); + return dnodeOpenVnodes(); } void dnodeCleanupMgmt() { + if (tsStatusTimer != NULL) { + taosTmrStopA(&tsStatusTimer); + tsStatusTimer = NULL; + } + + if (tsDnodeTmr != NULL) { + taosTmrCleanUp(tsDnodeTmr); + tsDnodeTmr = NULL; + } + dnodeCleanupVnodes(); - taosCleanUpIntHash(tsDnodeVnodesHash); + if (tsDnodeVnodesHash == NULL) { + taosCleanUpIntHash(tsDnodeVnodesHash); + tsDnodeVnodesHash = NULL; + } } -void dnodeMgmt(void *rpcMsg) { - SRpcMsg *pMsg = rpcMsg; +void dnodeMgmt(SRpcMsg *pMsg) { terrno = 0; if (dnodeProcessMgmtMsgFp[pMsg->msgType]) { (*dnodeProcessMgmtMsgFp[pMsg->msgType])(pMsg); } else { - terrno = TSDB_CODE_MSG_NOT_PROCESSED; + SRpcMsg rsp; + rsp.handle = pMsg->handle; + rsp.code = TSDB_CODE_MSG_NOT_PROCESSED; + rsp.pCont = NULL; + rpcSendResponse(&rsp); } - SRpcMsg rsp; - rsp.handle = pMsg->handle; - rsp.code = terrno; - rsp.pCont = NULL; - rpcSendResponse(&rsp); - rpcFreeCont(pMsg->pCont); // free the received message + rpcFreeCont(pMsg->pCont); } void *dnodeGetVnode(int32_t vgId) { - SVnodeObj *pVnode = taosGetIntHashData(tsDnodeVnodesHash, vgId); + SVnodeObj *pVnode = (SVnodeObj *) taosGetIntHashData(tsDnodeVnodesHash, vgId); if (pVnode == NULL) { terrno = TSDB_CODE_INVALID_VGROUP_ID; return NULL; @@ -125,30 +164,64 @@ void *dnodeGetVnodeTsdb(void *pVnode) { return ((SVnodeObj *)pVnode)->tsdb; } -void dnodeReleaseVnode(void *pVnode) { - atomic_sub_fetch_32(&((SVnodeObj *) pVnode)->refCount, 1); +void dnodeReleaseVnode(void *pVnodeRaw) { + SVnodeObj *pVnode = pVnodeRaw; + int32_t count = atomic_sub_fetch_32(&pVnode->refCount, 1); + + if (count == 0 && pVnode->dirty) { + dnodeDoDropVnode(pVnode); + } } static int32_t dnodeOpenVnodes() { - dPrint("open all vnodes"); + DIR *dir = opendir(tsVnodeDir); + if (dir == NULL) { + return TSDB_CODE_NO_WRITE_ACCESS; + } + + int32_t numOfVnodes = 0; + struct dirent *de = NULL; + while ((de = readdir(dir)) != NULL) { + if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0) continue; + if (de->d_type & DT_DIR) { + if (strncmp("vnode", de->d_name, 5) != 0) continue; + int32_t vnode = atoi(de->d_name + 5); + if (vnode == 0) continue; + + char vnodeDir[TSDB_FILENAME_LEN * 3]; + snprintf(vnodeDir, TSDB_FILENAME_LEN * 3, "%s/%s", tsVnodeDir, de->d_name); + int32_t code = dnodeOpenVnode(vnode, vnodeDir); + if (code == 0) { + numOfVnodes++; + } + } + } + closedir(dir); + + dPrint("dnode mgmt is opened, vnodes:%d", numOfVnodes); return TSDB_CODE_SUCCESS; } +typedef void (*CleanupFp)(char *); static void dnodeCleanupVnodes() { - dPrint("clean all vnodes"); + int32_t num = taosGetIntHashSize(tsDnodeVnodesHash); + taosCleanUpIntHashWithFp(tsDnodeVnodesHash, (CleanupFp)dnodeCleanupVnode); + dPrint("dnode mgmt is closed, vnodes:%d", num); } -static int32_t dnodeOpenVnode(int32_t vgId) { - char rootDir[TSDB_FILENAME_LEN] = {0}; - sprintf(rootDir, "%s/vnode%d", tsDirectory, vgId); - - void *pTsdb = tsdbOpenRepo(rootDir); - if (pTsdb != NULL) { +static int32_t dnodeOpenVnode(int32_t vnode, char *rootDir) { + char tsdbDir[TSDB_FILENAME_LEN]; + sprintf(tsdbDir, "%s/tsdb", rootDir); + void *pTsdb = tsdbOpenRepo(tsdbDir); + if (pTsdb == NULL) { + dError("failed to open tsdb in vnode:%d %s, reason:%s", vnode, tsdbDir, tstrerror(terrno)); return terrno; } - SVnodeObj vnodeObj; - vnodeObj.vgId = vgId; + //STsdbRepoInfo *tsdbInfo = tsdbGetStatus(pTsdb); + + SVnodeObj vnodeObj = {0}; + vnodeObj.vgId = vnode;//tsdbInfo->tsdbCfg.tsdbId; vnodeObj.status = TSDB_VN_STATUS_NOT_READY; vnodeObj.refCount = 1; vnodeObj.version = 0; @@ -160,8 +233,9 @@ static int32_t dnodeOpenVnode(int32_t vgId) { vnodeObj.events = NULL; vnodeObj.cq = NULL; - taosAddIntHash(tsDnodeVnodesHash, vnodeObj.vgId, &vnodeObj); + taosAddIntHash(tsDnodeVnodesHash, vnodeObj.vgId, (char *) (&vnodeObj)); + dTrace("open vnode:%d in %s", vnodeObj.vgId, rootDir); return TSDB_CODE_SUCCESS; } @@ -190,13 +264,13 @@ static void dnodeCleanupVnode(SVnodeObj *pVnode) { pVnode->tsdb = NULL; } - taosDeleteIntHash(tsDnodeVnodesHash, pVnode->vgId); + dTrace("cleanup vnode:%d", pVnode->vgId); } static int32_t dnodeCreateVnode(SMDCreateVnodeMsg *pVnodeCfg) { - STsdbCfg tsdbCfg; + STsdbCfg tsdbCfg = {0}; tsdbCfg.precision = pVnodeCfg->cfg.precision; - tsdbCfg.tsdbId = pVnodeCfg->vnode; + tsdbCfg.tsdbId = pVnodeCfg->cfg.vgId; tsdbCfg.maxTables = pVnodeCfg->cfg.maxSessions; tsdbCfg.daysPerFile = pVnodeCfg->cfg.daysPerFile; tsdbCfg.minRowsPerFileBlock = -1; @@ -205,14 +279,37 @@ static int32_t dnodeCreateVnode(SMDCreateVnodeMsg *pVnodeCfg) { tsdbCfg.maxCacheSize = -1; char rootDir[TSDB_FILENAME_LEN] = {0}; - sprintf(rootDir, "%s/vnode%d", tsDirectory, pVnodeCfg->cfg.vgId); + sprintf(rootDir, "%s/vnode%d", tsVnodeDir, pVnodeCfg->cfg.vgId); + if (mkdir(rootDir, 0755) != 0) { + if (errno == EACCES) { + return TSDB_CODE_NO_DISK_PERMISSIONS; + } else if (errno == ENOSPC) { + return TSDB_CODE_SERV_NO_DISKSPACE; + } else if (errno == EEXIST) { + } else { + return TSDB_CODE_VG_INIT_FAILED; + } + } + + sprintf(rootDir, "%s/vnode%d/tsdb", tsVnodeDir, pVnodeCfg->cfg.vgId); + if (mkdir(rootDir, 0755) != 0) { + if (errno == EACCES) { + return TSDB_CODE_NO_DISK_PERMISSIONS; + } else if (errno == ENOSPC) { + return TSDB_CODE_SERV_NO_DISKSPACE; + } else if (errno == EEXIST) { + } else { + return TSDB_CODE_VG_INIT_FAILED; + } + } void *pTsdb = tsdbCreateRepo(rootDir, &tsdbCfg, NULL); - if (pTsdb != NULL) { + if (pTsdb == NULL) { + dError("vgroup:%d, failed to create tsdb in vnode, reason:%s", pVnodeCfg->cfg.vgId, tstrerror(terrno)); return terrno; } - SVnodeObj vnodeObj; + SVnodeObj vnodeObj = {0}; vnodeObj.vgId = pVnodeCfg->cfg.vgId; vnodeObj.status = TSDB_VN_STATUS_NOT_READY; vnodeObj.refCount = 1; @@ -225,54 +322,65 @@ static int32_t dnodeCreateVnode(SMDCreateVnodeMsg *pVnodeCfg) { vnodeObj.events = NULL; vnodeObj.cq = NULL; - taosAddIntHash(tsDnodeVnodesHash, vnodeObj.vgId, &vnodeObj); + taosAddIntHash(tsDnodeVnodesHash, vnodeObj.vgId, (char *) (&vnodeObj)); + dPrint("vgroup:%d, vnode:%d is created", vnodeObj.vgId, vnodeObj.vgId); return TSDB_CODE_SUCCESS; } +static void dnodeDoDropVnode(SVnodeObj *pVnode) { + if (pVnode->tsdb) { + tsdbDropRepo(pVnode->tsdb); + pVnode->tsdb = NULL; + } + + dnodeCleanupVnode(pVnode); + taosDeleteIntHash(tsDnodeVnodesHash, pVnode->vgId); +} + static void dnodeDropVnode(SVnodeObj *pVnode) { pVnode->status = TSDB_VN_STATUS_NOT_READY; + pVnode->dirty = true; int32_t count = atomic_sub_fetch_32(&pVnode->refCount, 1); if (count > 0) { - // wait refcount - } - - if (pVnode->tsdb) { - tsdbDropRepo(pVnode->tsdb); - pVnode->tsdb = NULL; + dTrace("vgroup:%d, vnode will be dropped until refcount:%d is 0", pVnode->vgId, count); + return; } - dnodeCleanupVnode(pVnode); + dnodeDoDropVnode(pVnode); } -static void dnodeProcesSMDCreateVnodeMsg(SRpcMsg *rpcMsg) { +static void dnodeProcessCreateVnodeMsg(SRpcMsg *rpcMsg) { SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - SMDCreateVnodeMsg *pCreate = (SMDCreateVnodeMsg *) rpcMsg->pCont; - pCreate->vnode = htonl(pCreate->vnode); + SMDCreateVnodeMsg *pCreate = rpcMsg->pCont; pCreate->cfg.vgId = htonl(pCreate->cfg.vgId); pCreate->cfg.maxSessions = htonl(pCreate->cfg.maxSessions); pCreate->cfg.daysPerFile = htonl(pCreate->cfg.daysPerFile); - SVnodeObj *pVnodeObj = taosGetIntHashData(tsDnodeVnodesHash, pCreate->cfg.vgId); + dTrace("vgroup:%d, start to create vnode:%d in dnode", pCreate->cfg.vgId, pCreate->cfg.vgId); + + SVnodeObj *pVnodeObj = (SVnodeObj *) taosGetIntHashData(tsDnodeVnodesHash, pCreate->cfg.vgId); if (pVnodeObj != NULL) { rpcRsp.code = TSDB_CODE_SUCCESS; + dPrint("vgroup:%d, vnode is already exist", pCreate->cfg.vgId); } else { rpcRsp.code = dnodeCreateVnode(pCreate); } rpcSendResponse(&rpcRsp); - rpcFreeCont(rpcMsg->pCont); } -static void dnodeProcesSMDDropVnodeMsg(SRpcMsg *rpcMsg) { +static void dnodeProcessDropVnodeMsg(SRpcMsg *rpcMsg) { SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - SMDDropVnodeMsg *pDrop = (SMDCreateVnodeMsg *) rpcMsg->pCont; + SMDDropVnodeMsg *pDrop = rpcMsg->pCont; pDrop->vgId = htonl(pDrop->vgId); - SVnodeObj *pVnodeObj = taosGetIntHashData(tsDnodeVnodesHash, pDrop->vgId); + dTrace("vgroup:%d, start to drop vnode in dnode", pDrop->vgId); + + SVnodeObj *pVnodeObj = (SVnodeObj *) taosGetIntHashData(tsDnodeVnodesHash, pDrop->vgId); if (pVnodeObj != NULL) { dnodeDropVnode(pVnodeObj); rpcRsp.code = TSDB_CODE_SUCCESS; @@ -281,19 +389,19 @@ static void dnodeProcesSMDDropVnodeMsg(SRpcMsg *rpcMsg) { } rpcSendResponse(&rpcRsp); - rpcFreeCont(rpcMsg->pCont); } static void dnodeProcessAlterVnodeMsg(SRpcMsg *rpcMsg) { SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - SMDCreateVnodeMsg *pCreate = (SMDCreateVnodeMsg *) rpcMsg->pCont; - pCreate->vnode = htonl(pCreate->vnode); + SMDCreateVnodeMsg *pCreate = rpcMsg->pCont; pCreate->cfg.vgId = htonl(pCreate->cfg.vgId); pCreate->cfg.maxSessions = htonl(pCreate->cfg.maxSessions); pCreate->cfg.daysPerFile = htonl(pCreate->cfg.daysPerFile); - SVnodeObj *pVnodeObj = taosGetIntHashData(tsDnodeVnodesHash, pCreate->cfg.vgId); + dTrace("vgroup:%d, start to alter vnode in dnode", pCreate->cfg.vgId); + + SVnodeObj *pVnodeObj = (SVnodeObj *) taosGetIntHashData(tsDnodeVnodesHash, pCreate->cfg.vgId); if (pVnodeObj != NULL) { rpcRsp.code = TSDB_CODE_SUCCESS; } else { @@ -301,5 +409,123 @@ static void dnodeProcessAlterVnodeMsg(SRpcMsg *rpcMsg) { } rpcSendResponse(&rpcRsp); - rpcFreeCont(rpcMsg->pCont); } + +static void dnodeProcessAlterStreamMsg(SRpcMsg *pMsg) { +// SMDAlterStreamMsg *pStream = pCont; +// pStream->uid = htobe64(pStream->uid); +// pStream->stime = htobe64(pStream->stime); +// pStream->vnode = htonl(pStream->vnode); +// pStream->sid = htonl(pStream->sid); +// pStream->status = htonl(pStream->status); +// +// int32_t code = dnodeCreateStream(pStream); +} + +static void dnodeProcessConfigDnodeMsg(SRpcMsg *pMsg) { + SMDCfgDnodeMsg *pCfg = (SMDCfgDnodeMsg *)pMsg->pCont; + int32_t code = tsCfgDynamicOptions(pCfg->config); + + SRpcMsg rpcRsp = {.handle = pMsg->handle, .pCont = NULL, .contLen = 0, .code = code, .msgType = 0}; + rpcSendResponse(&rpcRsp); +} + +static void dnodeBuildVloadMsg(char *pNode, void * param) { + SVnodeObj *pVnode = (SVnodeObj *) pNode; + if (pVnode->dirty) return; + + SDMStatusMsg *pStatus = param; + if (pStatus->openVnodes >= TSDB_MAX_VNODES) return; + + SVnodeLoad *pLoad = &pStatus->load[pStatus->openVnodes++]; + pLoad->vgId = htonl(pVnode->vgId); + pLoad->vnode = htonl(pVnode->vgId); + pLoad->status = pVnode->status; +} + +static void dnodeSendStatusMsg(void *handle, void *tmrId) { + if (tsDnodeTmr == NULL) { + dError("dnode timer is already released"); + return; + } + + if (tsStatusTimer == NULL) { + taosTmrReset(dnodeSendStatusMsg, tsStatusInterval * 1000, NULL, tsDnodeTmr, &tsStatusTimer); + dError("failed to start status timer"); + return; + } + + int32_t contLen = sizeof(SDMStatusMsg) + TSDB_MAX_VNODES * sizeof(SVnodeLoad); + SDMStatusMsg *pStatus = rpcMallocCont(contLen); + if (pStatus == NULL) { + taosTmrReset(dnodeSendStatusMsg, tsStatusInterval * 1000, NULL, tsDnodeTmr, &tsStatusTimer); + dError("failed to malloc status message"); + return; + } + + strcpy(pStatus->dnodeName, tsDnodeName); + pStatus->version = htonl(tsVersion); + pStatus->dnodeId = htonl(tsDnodeId); + pStatus->privateIp = htonl(inet_addr(tsPrivateIp)); + pStatus->publicIp = htonl(inet_addr(tsPublicIp)); + pStatus->lastReboot = htonl(tsRebootTime); + pStatus->numOfTotalVnodes = htons((uint16_t) tsNumOfTotalVnodes); + pStatus->numOfCores = htons((uint16_t) tsNumOfCores); + pStatus->diskAvailable = tsAvailDataDirGB; + pStatus->alternativeRole = (uint8_t) tsAlternativeRole; + + taosVisitIntHashWithFp(tsDnodeVnodesHash, dnodeBuildVloadMsg, pStatus); + contLen = sizeof(SDMStatusMsg) + pStatus->openVnodes * sizeof(SVnodeLoad); + pStatus->openVnodes = htons(pStatus->openVnodes); + + SRpcMsg rpcMsg = { + .pCont = pStatus, + .contLen = contLen, + .msgType = TSDB_MSG_TYPE_DM_STATUS + }; + + dnodeSendMsgToMnode(&rpcMsg); + taosTmrReset(dnodeSendStatusMsg, tsStatusInterval * 1000, NULL, tsDnodeTmr, &tsStatusTimer); +} + +static void dnodeReadDnodeId() { + char dnodeIdFile[TSDB_FILENAME_LEN] = {0}; + sprintf(dnodeIdFile, "%s/dnodeId", tsDnodeDir); + + FILE *fp = fopen(dnodeIdFile, "r"); + if (!fp) return; + + char option[32] = {0}; + int32_t value = 0; + int32_t num = 0; + + num = fscanf(fp, "%s %d", option, &value); + if (num != 2) return; + if (strcmp(option, "dnodeId") != 0) return; + tsDnodeId = value;; + + fclose(fp); + dPrint("read dnodeId:%d successed", tsDnodeId); +} + +static void dnodeSaveDnodeId() { + char dnodeIdFile[TSDB_FILENAME_LEN] = {0}; + sprintf(dnodeIdFile, "%s/dnodeId", tsDnodeDir); + + FILE *fp = fopen(dnodeIdFile, "w"); + if (!fp) return; + + fprintf(fp, "dnodeId %d\n", tsDnodeId); + + fclose(fp); + dPrint("save dnodeId successed"); +} + +void dnodeUpdateDnodeId(int32_t dnodeId) { + if (tsDnodeId == 0) { + dPrint("dnodeId is set to %d", dnodeId); + tsDnodeId = dnodeId; + dnodeSaveDnodeId(); + } +} + diff --git a/src/dnode/src/dnodeMnode.c b/src/dnode/src/dnodeMnode.c index a374dbe2934502969866943ac8d1803e3ecf4578..9d1be0148ebf3e011ec0e649e38d2cf402b42d2e 100644 --- a/src/dnode/src/dnodeMnode.c +++ b/src/dnode/src/dnodeMnode.c @@ -17,7 +17,7 @@ #include "taosmsg.h" #include "tlog.h" #include "trpc.h" -#include "dnodeSystem.h" +#include "dnode.h" #include "dnodeMgmt.h" #include "dnodeWrite.h" @@ -27,57 +27,62 @@ static void *tsDnodeMnodeRpc = NULL; int32_t dnodeInitMnode() { dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_CREATE_TABLE] = dnodeWrite; - dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_DROP_TABLE] = dnodeWrite; - dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_DROP_VNODE] = dnodeMgmt; + dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_DROP_TABLE] = dnodeWrite; + dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_ALTER_TABLE] = dnodeWrite; + dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_DROP_STABLE] = dnodeWrite; + dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_CREATE_VNODE] = dnodeMgmt; + dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_DROP_VNODE] = dnodeMgmt; + dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_ALTER_VNODE] = dnodeMgmt; + dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_ALTER_STREAM] = dnodeMgmt; + dnodeProcessMgmtMsgFp[TSDB_MSG_TYPE_MD_CONFIG_DNODE] = dnodeMgmt; SRpcInit rpcInit; memset(&rpcInit, 0, sizeof(rpcInit)); rpcInit.localIp = tsAnyIp ? "0.0.0.0" : tsPrivateIp; - - // note: a new port shall be assigned - // rpcInit.localPort = tsDnodeMnodePort; - rpcInit.label = "DND-mgmt"; + rpcInit.localPort = tsDnodeMnodePort; + rpcInit.label = "DND-MS"; rpcInit.numOfThreads = 1; - rpcInit.cfp = dnodeProcessMsgFromMnode; - rpcInit.sessions = TSDB_SESSIONS_PER_DNODE; + rpcInit.cfp = dnodeProcessMsgFromMnode; + rpcInit.sessions = 100; rpcInit.connType = TAOS_CONN_SERVER; - rpcInit.idleTime = tsShellActivityTimer * 1500; + rpcInit.idleTime = tsShellActivityTimer * 2000; tsDnodeMnodeRpc = rpcOpen(&rpcInit); if (tsDnodeMnodeRpc == NULL) { - dError("failed to init connection from mgmt"); + dError("failed to init mnode rpc server"); return -1; } - dPrint("connection to mgmt is opened"); + dPrint("mnode rpc server is opened"); return 0; } void dnodeCleanupMnode() { if (tsDnodeMnodeRpc) { rpcClose(tsDnodeMnodeRpc); + tsDnodeMnodeRpc = NULL; + dPrint("mnode rpc server is closed"); } } static void dnodeProcessMsgFromMnode(SRpcMsg *pMsg) { SRpcMsg rspMsg; - - rspMsg.handle = pMsg->handle; - rspMsg.pCont = NULL; + rspMsg.handle = pMsg->handle; + rspMsg.pCont = NULL; rspMsg.contLen = 0; if (dnodeGetRunStatus() != TSDB_DNODE_RUN_STATUS_RUNING) { rspMsg.code = TSDB_CODE_NOT_READY; rpcSendResponse(&rspMsg); rpcFreeCont(pMsg->pCont); - dTrace("conn:%p, query msg is ignored since dnode not running", pMsg->handle); + dTrace("thandle:%p, query msg is ignored since dnode not running", pMsg->handle); return; } if (dnodeProcessMgmtMsgFp[pMsg->msgType]) { (*dnodeProcessMgmtMsgFp[pMsg->msgType])(pMsg); } else { - dError("%s is not processed", taosMsg[pMsg->msgType]); + dError("%s is not processed in mserver", taosMsg[pMsg->msgType]); rspMsg.code = TSDB_CODE_MSG_NOT_PROCESSED; rpcSendResponse(&rspMsg); rpcFreeCont(pMsg->pCont); diff --git a/src/dnode/src/dnodeModule.c b/src/dnode/src/dnodeModule.c index dd4678802f8129d6b55cc01e12b8145394121691..7a2facb255c784af0b0856c24abb856a2a33eed8 100644 --- a/src/dnode/src/dnodeModule.c +++ b/src/dnode/src/dnodeModule.c @@ -22,9 +22,9 @@ #include "http.h" #include "monitor.h" #include "dnodeModule.h" -#include "dnodeSystem.h" +#include "dnode.h" -void dnodeAllocModules() { +static void dnodeAllocModules() { tsModule[TSDB_MOD_MGMT].name = "mgmt"; tsModule[TSDB_MOD_MGMT].initFp = mgmtInitSystem; tsModule[TSDB_MOD_MGMT].cleanUpFp = mgmtCleanUpSystem; @@ -69,10 +69,12 @@ void dnodeCleanUpModules() { } int32_t dnodeInitModules() { + dnodeAllocModules(); + for (int mod = 0; mod < TSDB_MOD_MAX; ++mod) { if (tsModule[mod].num != 0 && tsModule[mod].initFp) { if ((*tsModule[mod].initFp)() != 0) { - dError("TDengine initialization failed"); + dError("failed to init modules"); return -1; } } @@ -81,14 +83,45 @@ int32_t dnodeInitModules() { return TSDB_CODE_SUCCESS; } -void dnodeStartModulesImp() { - for (int mod = 1; mod < TSDB_MOD_MAX; ++mod) { - if (tsModule[mod].num != 0 && tsModule[mod].startFp) { - if ((*tsModule[mod].startFp)() != 0) { - dError("failed to start module:%d", mod); +void dnodeStartModules() { + // for (int mod = 1; mod < TSDB_MOD_MAX; ++mod) { + // if (tsModule[mod].num != 0 && tsModule[mod].startFp) { + // if ((*tsModule[mod].startFp)() != 0) { + // dError("failed to start module:%d", mod); + // } + // } + // } +} + +void dnodeProcessModuleStatus(uint32_t moduleStatus) { + if (moduleStatus == tsModuleStatus) return; + + dPrint("module status is received, old:%d, new:%d", tsModuleStatus, moduleStatus); + + int news = moduleStatus; + int olds = tsModuleStatus; + + for (int moduleType = 0; moduleType < TSDB_MOD_MAX; ++moduleType) { + int newStatus = news & (1 << moduleType); + int oldStatus = olds & (1 << moduleType); + + if (oldStatus > 0) { + if (newStatus == 0) { + if (tsModule[moduleType].stopFp) { + dPrint("module:%s is stopped on this node", tsModule[moduleType].name); + (*tsModule[moduleType].stopFp)(); + } + } + } else if (oldStatus == 0) { + if (newStatus > 0) { + if (tsModule[moduleType].startFp) { + dPrint("module:%s is started on this node", tsModule[moduleType].name); + (*tsModule[moduleType].startFp)(); + } } + } else { } } -} -void (*dnodeStartModules)() = dnodeStartModulesImp; + tsModuleStatus = moduleStatus; +} diff --git a/src/dnode/src/dnodeRead.c b/src/dnode/src/dnodeRead.c index 1bbc65ef5c6359a19652fdc7eda160f31a255ccc..11cb845798b13f8789a09876bd81082eb1691d6c 100644 --- a/src/dnode/src/dnodeRead.c +++ b/src/dnode/src/dnodeRead.c @@ -15,13 +15,16 @@ #define _DEFAULT_SOURCE #include "os.h" + #include "taoserror.h" #include "taosmsg.h" #include "tlog.h" #include "tqueue.h" #include "trpc.h" -#include "dnodeRead.h" + #include "dnodeMgmt.h" +#include "dnodeRead.h" +#include "queryExecutor.h" typedef struct { int32_t code; @@ -59,54 +62,73 @@ int32_t dnodeInitRead() { maxThreads = tsNumOfCores*tsNumOfThreadsPerCore; if (maxThreads <= minThreads*2) maxThreads = 2*minThreads; + dPrint("dnode read is opened"); return 0; } void dnodeCleanupRead() { taosCloseQset(readQset); + dPrint("dnode read is closed"); } -void dnodeRead(void *rpcMsg) { - SRpcMsg *pMsg = rpcMsg; - +void dnodeRead(SRpcMsg *pMsg) { + int32_t queuedMsgNum = 0; int32_t leftLen = pMsg->contLen; char *pCont = (char *) pMsg->pCont; - int32_t contLen = 0; - int32_t numOfVnodes = 0; - int32_t vgId = 0; SRpcContext *pRpcContext = NULL; - // parse head, get number of vnodes; - if ( numOfVnodes > 1) { - pRpcContext = calloc(sizeof(SRpcContext), 1); - pRpcContext->numOfVnodes = 1; +// SMsgDesc *pDesc = pCont; +// pDesc->numOfVnodes = htonl(pDesc->numOfVnodes); +// pCont += sizeof(SMsgDesc); +// if (pDesc->numOfVnodes > 1) { +// pRpcContext = calloc(sizeof(SRpcContext), 1); +// pRpcContext->numOfVnodes = pDesc->numOfVnodes; +// } + if (pMsg->msgType == TSDB_MSG_TYPE_RETRIEVE) { + queuedMsgNum = 0; } while (leftLen > 0) { - // todo: parse head, get vgId, contLen + SMsgHead *pHead = (SMsgHead *) pCont; + pHead->vgId = 1;//htonl(pHead->vgId); + pHead->contLen = pMsg->contLen; //htonl(pHead->contLen); - // get pVnode from vgId - void *pVnode = dnodeGetVnode(vgId); + void *pVnode = dnodeGetVnode(pHead->vgId); if (pVnode == NULL) { + leftLen -= pHead->contLen; + pCont -= pHead->contLen; continue; } // put message into queue - SReadMsg *pReadMsg = taosAllocateQitem(sizeof(SReadMsg)); - pReadMsg->rpcMsg = *pMsg; - pReadMsg->pCont = pCont; - pReadMsg->contLen = contLen; - pReadMsg->pRpcContext = pRpcContext; + SReadMsg readMsg; + readMsg.rpcMsg = *pMsg; + readMsg.pCont = pCont; + readMsg.contLen = pHead->contLen; + readMsg.pRpcContext = pRpcContext; + readMsg.pVnode = pVnode; taos_queue queue = dnodeGetVnodeRworker(pVnode); taosWriteQitem(queue, 0, pReadMsg); // next vnode - leftLen -= contLen; - pCont -= contLen; + leftLen -= pHead->contLen; + pCont -= pHead->contLen; + queuedMsgNum++; dnodeReleaseVnode(pVnode); } + + if (queuedMsgNum == 0) { + SRpcMsg rpcRsp = { + .handle = pMsg->handle, + .pCont = NULL, + .contLen = 0, + .code = TSDB_CODE_INVALID_VGROUP_ID, + .msgType = 0 + }; + rpcSendResponse(&rpcRsp); + } } void *dnodeAllocateReadWorker(void *pVnode) { @@ -205,10 +227,80 @@ static void dnodeProcessReadResult(void *pVnode, SReadMsg *pRead) { rpcFreeCont(pRead->rpcMsg.pCont); // free the received message } -static void dnodeProcessQueryMsg(void *pVnode, SReadMsg *pMsg) { - +static void dnodeProcessQueryMsg(SReadMsg *pMsg) { + SQueryTableMsg* pQueryTableMsg = (SQueryTableMsg*) pMsg->pCont; + + SQInfo* pQInfo = NULL; + int32_t code = qCreateQueryInfo(pQueryTableMsg, &pQInfo); + + SQueryTableRsp *pRsp = (SQueryTableRsp *) rpcMallocCont(sizeof(SQueryTableRsp)); + pRsp->code = code; + pRsp->qhandle = htobe64((uint64_t) (pQInfo)); + + SRpcMsg rpcRsp = { + .handle = pMsg->rpcMsg.handle, + .pCont = pRsp, + .contLen = sizeof(SQueryTableRsp), + .code = code, + .msgType = 0 + }; + + // do execute query + qTableQuery(pQInfo); + + rpcSendResponse(&rpcRsp); } -static void dnodeProcessRetrieveMsg(void *pVnode, SReadMsg *pMsg) { - +static void dnodeProcessRetrieveMsg(SReadMsg *pMsg) { + SRetrieveTableMsg *pRetrieve = pMsg->pCont; + void *pQInfo = htobe64(pRetrieve->qhandle); + + dTrace("retrieve msg is disposed, qInfo:%p", pQInfo); + + int32_t rowSize = 0; + int32_t numOfRows = 0; + int32_t contLen = 0; + + SRpcMsg rpcRsp = {0}; + + int32_t code = qRetrieveQueryResultInfo(pQInfo, &numOfRows, &rowSize); + if (code != TSDB_CODE_SUCCESS) { + contLen = sizeof(SRetrieveTableRsp); + + SRetrieveTableRsp *pRsp = (SRetrieveTableRsp *)rpcMallocCont(contLen); + pRsp->numOfRows = 0; + pRsp->precision = 0; + pRsp->offset = 0; + pRsp->useconds = 0; + + rpcRsp = (SRpcMsg) { + .handle = pMsg->rpcMsg.handle, + .pCont = pRsp, + .contLen = contLen, + .code = code, + .msgType = 0 + }; + + //todo free qinfo + } else { + contLen = 100; + + SRetrieveTableRsp *pRsp = (SRetrieveTableRsp *)rpcMallocCont(contLen); + pRsp->numOfRows = 0; + pRsp->precision = 0; + pRsp->offset = 0; + pRsp->useconds = 0; + + *(int64_t*) pRsp->data = 1000; + + rpcRsp = (SRpcMsg) { + .handle = pMsg->rpcMsg.handle, + .pCont = pRsp, + .contLen = contLen, + .code = code, + .msgType = 0 + }; + } + + rpcSendResponse(&rpcRsp); } diff --git a/src/dnode/src/dnodeService.c b/src/dnode/src/dnodeService.c deleted file mode 100644 index a8a110ab4cff681d79fb68782b6ab782bb3820c2..0000000000000000000000000000000000000000 --- a/src/dnode/src/dnodeService.c +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" -#include "tlog.h" -#include "tglobalcfg.h" -#include "dnodeSystem.h" - -void (*dnodeParseParameterKFp)() = NULL; - -/* - * Termination handler - */ -void signal_handler(int signum, siginfo_t *sigInfo, void *context) { - if (signum == SIGUSR1) { - tsCfgDynamicOptions("debugFlag 135"); - return; - } - if (signum == SIGUSR2) { - tsCfgDynamicOptions("resetlog"); - return; - } - syslog(LOG_INFO, "Shut down signal is %d", signum); - syslog(LOG_INFO, "Shutting down TDengine service..."); - // clean the system. - dPrint("shut down signal is %d, sender PID:%d", signum, sigInfo->si_pid); - dnodeCleanUpSystem(); - // close the syslog - syslog(LOG_INFO, "Shut down TDengine service successfully"); - dPrint("TDengine is shut down!"); - closelog(); - exit(EXIT_SUCCESS); -} - -int main(int argc, char *argv[]) { - dnodeInitPlugins(); - - // Set global configuration file - for (int i = 1; i < argc; ++i) { - if (strcmp(argv[i], "-c") == 0) { - if (i < argc - 1) { - strcpy(configDir, argv[++i]); - } else { - printf("'-c' requires a parameter, default:%s\n", configDir); - exit(EXIT_FAILURE); - } - } else if (strcmp(argv[i], "-V") == 0) { - char *versionStr = tsIsCluster ? "enterprise" : "community"; - printf("%s version: %s compatible_version: %s\n", versionStr, version, compatible_version); - printf("gitinfo: %s\n", gitinfo); - printf("gitinfoI: %s\n", gitinfoOfInternal); - printf("buildinfo: %s\n", buildinfo); - return 0; - } else if (strcmp(argv[i], "-k") == 0) { - if (dnodeParseParameterKFp) { - dnodeParseParameterKFp(); - } -#ifdef TAOS_MEM_CHECK - } else if (strcmp(argv[i], "--alloc-random-fail") == 0) { - if ((i < argc - 1) && (argv[i+1][0] != '-')) { - taosSetAllocMode(TAOS_ALLOC_MODE_RANDOM_FAIL, argv[++i], true); - } else { - taosSetAllocMode(TAOS_ALLOC_MODE_RANDOM_FAIL, NULL, true); - } - } else if (strcmp(argv[i], "--detect-mem-leak") == 0) { - if ((i < argc - 1) && (argv[i+1][0] != '-')) { - taosSetAllocMode(TAOS_ALLOC_MODE_DETECT_LEAK, argv[++i], true); - } else { - taosSetAllocMode(TAOS_ALLOC_MODE_DETECT_LEAK, NULL, true); - } -#endif - } - } - - /* Set termination handler. */ - struct sigaction act; - act.sa_flags = SA_SIGINFO; - act.sa_sigaction = signal_handler; - sigaction(SIGTERM, &act, NULL); - sigaction(SIGHUP, &act, NULL); - sigaction(SIGINT, &act, NULL); - sigaction(SIGUSR1, &act, NULL); - sigaction(SIGUSR2, &act, NULL); - // sigaction(SIGABRT, &act, NULL); - - // Open /var/log/syslog file to record information. - openlog("TDengine:", LOG_PID | LOG_CONS | LOG_NDELAY, LOG_LOCAL1); - syslog(LOG_INFO, "Starting TDengine service..."); - - // Initialize the system - if (dnodeInitSystem() < 0) { - syslog(LOG_ERR, "Error initialize TDengine system"); - closelog(); - - dnodeCleanUpSystem(); - exit(EXIT_FAILURE); - } - - syslog(LOG_INFO, "Started TDengine service successfully."); - - while (1) { - sleep(1000); - } -} - - diff --git a/src/dnode/src/dnodeShell.c b/src/dnode/src/dnodeShell.c index 6cf3cf4df96181dd99ea13673f5081bc376c5aef..951d43c34b88a2da659aa7429f37defb50837fb8 100644 --- a/src/dnode/src/dnodeShell.c +++ b/src/dnode/src/dnodeShell.c @@ -20,18 +20,21 @@ #include "taosmsg.h" #include "tlog.h" #include "trpc.h" -#include "dnodeSystem.h" +#include "dnode.h" #include "dnodeRead.h" #include "dnodeWrite.h" #include "dnodeShell.h" static void (*dnodeProcessShellMsgFp[TSDB_MSG_TYPE_MAX])(SRpcMsg *); static void dnodeProcessMsgFromShell(SRpcMsg *pMsg); +static int dnodeRetrieveUserAuthInfo(char *user, char *spi, char *encrypt, char *secret, char *ckey); static void *tsDnodeShellRpc = NULL; +static int32_t tsDnodeQueryReqNum = 0; +static int32_t tsDnodeSubmitReqNum = 0; int32_t dnodeInitShell() { - dnodeProcessShellMsgFp[TSDB_MSG_TYPE_SUBMIT] = dnodeWrite; - dnodeProcessShellMsgFp[TSDB_MSG_TYPE_QUERY] = dnodeRead; + dnodeProcessShellMsgFp[TSDB_MSG_TYPE_SUBMIT] = dnodeWrite; + dnodeProcessShellMsgFp[TSDB_MSG_TYPE_QUERY] = dnodeRead; dnodeProcessShellMsgFp[TSDB_MSG_TYPE_RETRIEVE] = dnodeRead; int numOfThreads = tsNumOfCores * tsNumOfThreadsPerCore; @@ -43,33 +46,34 @@ int32_t dnodeInitShell() { SRpcInit rpcInit; memset(&rpcInit, 0, sizeof(rpcInit)); rpcInit.localIp = tsAnyIp ? "0.0.0.0" : tsPrivateIp; - rpcInit.localPort = tsVnodeShellPort; + rpcInit.localPort = tsDnodeShellPort; rpcInit.label = "DND-shell"; rpcInit.numOfThreads = numOfThreads; - rpcInit.cfp = dnodeProcessMsgFromShell; + rpcInit.cfp = dnodeProcessMsgFromShell; rpcInit.sessions = TSDB_SESSIONS_PER_DNODE; rpcInit.connType = TAOS_CONN_SERVER; rpcInit.idleTime = tsShellActivityTimer * 1500; + rpcInit.afp = dnodeRetrieveUserAuthInfo; tsDnodeShellRpc = rpcOpen(&rpcInit); if (tsDnodeShellRpc == NULL) { - dError("failed to init connection from shell"); + dError("failed to init shell rpc server"); return -1; } - dPrint("connection to shell is opened"); + dPrint("shell rpc server is opened"); return 0; } void dnodeCleanupShell() { if (tsDnodeShellRpc) { rpcClose(tsDnodeShellRpc); + tsDnodeShellRpc = NULL; } } void dnodeProcessMsgFromShell(SRpcMsg *pMsg) { SRpcMsg rpcMsg; - rpcMsg.handle = pMsg->handle; rpcMsg.pCont = NULL; rpcMsg.contLen = 0; @@ -82,6 +86,12 @@ void dnodeProcessMsgFromShell(SRpcMsg *pMsg) { return; } + if (pMsg->msgType == TSDB_MSG_TYPE_QUERY) { + atomic_fetch_add_32(&tsDnodeQueryReqNum, 1); + } else if (pMsg->msgType == TSDB_MSG_TYPE_SUBMIT) { + atomic_fetch_add_32(&tsDnodeSubmitReqNum, 1); + } else {} + if ( dnodeProcessShellMsgFp[pMsg->msgType] ) { (*dnodeProcessShellMsgFp[pMsg->msgType])(pMsg); } else { @@ -92,4 +102,17 @@ void dnodeProcessMsgFromShell(SRpcMsg *pMsg) { } } +static int dnodeRetrieveUserAuthInfo(char *user, char *spi, char *encrypt, char *secret, char *ckey) { + return TSDB_CODE_SUCCESS; +} + +SDnodeStatisInfo dnodeGetStatisInfo() { + SDnodeStatisInfo info = {0}; + if (dnodeGetRunStatus() == TSDB_DNODE_RUN_STATUS_RUNING) { + //info.httpReqNum = httpGetReqCount(); + info.queryReqNum = atomic_exchange_32(&tsDnodeQueryReqNum, 0); + info.submitReqNum = atomic_exchange_32(&tsDnodeSubmitReqNum, 0); + } + return info; +} diff --git a/src/dnode/src/dnodeSystem.c b/src/dnode/src/dnodeSystem.c deleted file mode 100644 index be3e03a72c5f1bf298de262f0168842f0fc1ab2b..0000000000000000000000000000000000000000 --- a/src/dnode/src/dnodeSystem.c +++ /dev/null @@ -1,304 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#define _DEFAULT_SOURCE -#include "os.h" -#include "taosdef.h" -#include "taoserror.h" -#include "tcrc32c.h" -#include "tlog.h" -#include "tmodule.h" -#include "tsched.h" -#include "ttime.h" -#include "ttimer.h" -#include "tutil.h" -#include "http.h" -#include "trpc.h" -#include "dnode.h" -#include "dnodeMgmt.h" -#include "dnodeModule.h" -#include "dnodeShell.h" -#include "dnodeSystem.h" - -#ifdef CLUSTER -#include "account.h" -#include "admin.h" -#include "balance.h" -#include "cluster.h" -#include "grant.h" -#include "mpeer.h" -#include "storage.h" -#include "vpeer.h" -#endif - -static pthread_mutex_t tsDnodeMutex; -static SDnodeRunStatus tsDnodeRunStatus = TSDB_DNODE_RUN_STATUS_STOPPED; - -static int32_t dnodeInitRpcQHandle(); -static int32_t dnodeInitQueryQHandle(); -static int32_t dnodeInitTmrCtl(); - - -int32_t (*dnodeInitStorage)() = NULL; -void (*dnodeCleanupStorage)() = NULL; -int32_t (*dnodeInitPeers)(int32_t numOfThreads) = NULL; - -void *tsDnodeTmr; -void **tsRpcQhandle; -void *tsDnodeMgmtQhandle; -void *tsQueryQhandle; -int32_t tsVnodePeers = TSDB_VNODES_SUPPORT - 1; -int32_t tsMaxQueues; -uint32_t tsRebootTime; - -static void dnodeInitVnodesLock() { - pthread_mutex_init(&tsDnodeMutex, NULL); -} - -void dnodeLockVnodes() { - pthread_mutex_lock(&tsDnodeMutex); -} - -void dnodeUnLockVnodes() { - pthread_mutex_unlock(&tsDnodeMutex); -} - -static void dnodeCleanVnodesLock() { - pthread_mutex_destroy(&tsDnodeMutex); -} - -SDnodeRunStatus dnodeGetRunStatus() { - return tsDnodeRunStatus; -} - -void dnodeSetRunStatus(SDnodeRunStatus status) { - tsDnodeRunStatus = status; -} - -void dnodeCleanUpSystem() { - tclearModuleStatus(TSDB_MOD_MGMT); - - if (dnodeGetRunStatus() == TSDB_DNODE_RUN_STATUS_STOPPED) { - return; - } else { - dnodeSetRunStatus(TSDB_DNODE_RUN_STATUS_STOPPED); - } - - dnodeCleanupShell(); - dnodeCleanUpModules(); - dnodeCleanupMgmt(); - taosCloseLogger(); - dnodeCleanupStorage(); - dnodeCleanVnodesLock(); -} - -void dnodeCheckDataDirOpenned(const char *dir) { - char filepath[256] = {0}; - sprintf(filepath, "%s/.running", dir); - int32_t fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO); - int32_t ret = flock(fd, LOCK_EX | LOCK_NB); - if (ret != 0) { - dError("failed to lock file:%s ret:%d, database may be running, quit", filepath, ret); - exit(0); - } -} - -void dnodeInitPlugins() { -#ifdef CLUSTER -// acctInit(); -// adminInit(); -// balanceInit(); -// clusterInit(); -// grantInit(); -// mpeerInit(); -// storageInit(); -#endif -} - -int32_t dnodeInitSystem() { - tsRebootTime = taosGetTimestampSec(); - tscEmbedded = 1; - - dnodeSetRunStatus(TSDB_DNODE_RUN_STATUS_INITIALIZE); - taosResolveCRC(); - - // Read global configuration. - tsReadGlobalLogConfig(); - - struct stat dirstat; - if (stat(logDir, &dirstat) < 0) { - mkdir(logDir, 0755); - } - - char temp[128]; - sprintf(temp, "%s/taosdlog", logDir); - if (taosInitLog(temp, tsNumOfLogLines, 1) < 0) { - printf("failed to init log file\n"); - } - - if (!tsReadGlobalConfig()) { - tsPrintGlobalConfig(); - dError("TDengine read global config failed"); - return -1; - } - - if (dnodeInitStorage() != 0) { - dError("TDengine init tier directory failed"); - return -1; - } - -// dnodeInitMgmtIp(); - - tsPrintGlobalConfig(); - - dPrint("Server IP address is:%s", tsPrivateIp); - - taosSetCoreDump(); - - signal(SIGPIPE, SIG_IGN); - - dnodeAllocModules(); - - dnodeInitVnodesLock(); - - dPrint("starting to initialize TDengine ..."); - - if (dnodeInitRpcQHandle() < 0) { - dError("failed to init query qhandle, exit"); - return -1; - } - - if (dnodeCheckSystem() < 0) { - return -1; - } - - if (dnodeInitModules() < 0) { - return -1; - } - - if (dnodeInitTmrCtl() < 0) { - dError("failed to init timer, exit"); - return -1; - } - - if (dnodeInitQueryQHandle() < 0) { - dError("failed to init query qhandle, exit"); - return -1; - } - - if (dnodeInitMgmt() < 0) { - dError("failed to init vnode storage"); - return -1; - } - - int32_t numOfThreads = (1.0 - tsRatioOfQueryThreads) * tsNumOfCores * tsNumOfThreadsPerCore / 2.0; - if (numOfThreads < 1) numOfThreads = 1; - if (dnodeInitPeers(numOfThreads) < 0) { - dError("failed to init vnode peer communication"); - return -1; - } - - if (dnodeInitMgmt() < 0) { - dError("failed to init communication to mgmt"); - return -1; - } - - if (dnodeInitShell() < 0) { - dError("failed to init communication to shell"); - return -1; - } - - dnodeStartModules(); - - dnodeSetRunStatus(TSDB_DNODE_RUN_STATUS_RUNING); - - dPrint("TDengine is initialized successfully"); - - return 0; -} - -int32_t dnodeInitStorageImp() { - struct stat dirstat; - strcpy(tsDirectory, dataDir); - if (stat(dataDir, &dirstat) < 0) { - mkdir(dataDir, 0755); - } - - char fileName[128]; - - sprintf(fileName, "%s/tsdb", tsDirectory); - mkdir(fileName, 0755); - - sprintf(fileName, "%s/data", tsDirectory); - mkdir(fileName, 0755); - - sprintf(tsMgmtDirectory, "%s/mgmt", tsDirectory); - sprintf(tsDirectory, "%s/tsdb", dataDir); - dnodeCheckDataDirOpenned(dataDir); - - return 0; -} - -static int32_t dnodeInitQueryQHandle() { - int32_t numOfThreads = tsRatioOfQueryThreads * tsNumOfCores * tsNumOfThreadsPerCore; - if (numOfThreads < 1) { - numOfThreads = 1; - } - - int32_t maxQueueSize = tsNumOfVnodesPerCore * tsNumOfCores * tsSessionsPerVnode; - dTrace("query task queue initialized, max slot:%d, task threads:%d", maxQueueSize, numOfThreads); - - tsQueryQhandle = taosInitSchedulerWithInfo(maxQueueSize, numOfThreads, "query", tsDnodeTmr); - - return 0; -} - -static int32_t dnodeInitTmrCtl() { - tsDnodeTmr = taosTmrInit(TSDB_MAX_VNODES * (tsVnodePeers + 10) + tsSessionsPerVnode + 1000, 200, 60000, - "DND-vnode"); - if (tsDnodeTmr == NULL) { - dError("failed to init timer, exit"); - return -1; - } - - return 0; -} - -static int32_t dnodeInitRpcQHandle() { - tsMaxQueues = (1.0 - tsRatioOfQueryThreads) * tsNumOfCores * tsNumOfThreadsPerCore / 2.0; - if (tsMaxQueues < 1) { - tsMaxQueues = 1; - } - - tsRpcQhandle = malloc(tsMaxQueues * sizeof(void *)); - - for (int32_t i = 0; i < tsMaxQueues; ++i) { - tsRpcQhandle[i] = taosInitScheduler(tsSessionsPerVnode, 1, "dnode"); - } - - tsDnodeMgmtQhandle = taosInitScheduler(tsSessionsPerVnode, 1, "mgmt"); - - return 0; -} - -int32_t dnodeCheckSystemImp() { - return 0; -} - -int32_t (*dnodeCheckSystem)() = dnodeCheckSystemImp; - -int32_t dnodeInitPeersImp(int32_t numOfThreads) { - return 0; -} diff --git a/src/dnode/src/dnodeWrite.c b/src/dnode/src/dnodeWrite.c index 9e4acd6e1a582cc2b17451dec5907f30c778a0cc..8e5bcc5844d4af79c28aa2fa64869829ce8d1efe 100644 --- a/src/dnode/src/dnodeWrite.c +++ b/src/dnode/src/dnodeWrite.c @@ -20,6 +20,8 @@ #include "tlog.h" #include "tqueue.h" #include "trpc.h" +#include "tsdb.h" +#include "dataformat.h" #include "dnodeWrite.h" #include "dnodeMgmt.h" @@ -51,10 +53,12 @@ typedef struct _thread_obj { static void (*dnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MAX])(void *, SWriteMsg *); static void *dnodeProcessWriteQueue(void *param); static void dnodeHandleIdleWorker(SWriteWorker *pWorker); -static void dnodeProcessWriteResult(void *pVnode, SWriteMsg *pWrite); -static void dnodeProcessSubmitMsg(void *pVnode, SWriteMsg *pMsg); -static void dnodeProcessCreateTableMsg(void *pVnode, SWriteMsg *pMsg); -static void dnodeProcessDropTableMsg(void *pVnode, SWriteMsg *pMsg); +static void dnodeProcessWriteResult(SWriteMsg *pWrite); +static void dnodeProcessSubmitMsg(SWriteMsg *pMsg); +static void dnodeProcessCreateTableMsg(SWriteMsg *pMsg); +static void dnodeProcessDropTableMsg(SWriteMsg *pMsg); +static void dnodeProcessAlterTableMsg(SWriteMsg *pMsg); +static void dnodeProcessDropStableMsg(SWriteMsg *pMsg); SWriteWorkerPool wWorkerPool; @@ -62,6 +66,8 @@ int32_t dnodeInitWrite() { dnodeProcessWriteMsgFp[TSDB_MSG_TYPE_SUBMIT] = dnodeProcessSubmitMsg; dnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MD_CREATE_TABLE] = dnodeProcessCreateTableMsg; dnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MD_DROP_TABLE] = dnodeProcessDropTableMsg; + dnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MD_ALTER_TABLE] = dnodeProcessAlterTableMsg; + dnodeProcessWriteMsgFp[TSDB_MSG_TYPE_MD_DROP_STABLE] = dnodeProcessDropStableMsg; wWorkerPool.max = tsNumOfCores; wWorkerPool.writeWorker = (SWriteWorker *)calloc(sizeof(SWriteWorker), wWorkerPool.max); @@ -71,63 +77,84 @@ int32_t dnodeInitWrite() { wWorkerPool.writeWorker[i].workerId = i; } + dPrint("dnode write is opened"); return 0; } void dnodeCleanupWrite() { free(wWorkerPool.writeWorker); + dPrint("dnode write is closed"); } -void dnodeWrite(void *rpcMsg) { - SRpcMsg *pMsg = rpcMsg; - +void dnodeWrite(SRpcMsg *pMsg) { + int32_t queuedMsgNum = 0; int32_t leftLen = pMsg->contLen; char *pCont = (char *) pMsg->pCont; - int32_t contLen = 0; - int32_t numOfVnodes = 0; - int32_t vgId = 0; SRpcContext *pRpcContext = NULL; - // parse head, get number of vnodes; - - if ( numOfVnodes > 1) { - pRpcContext = calloc(sizeof(SRpcContext), 1); - pRpcContext->numOfVnodes = numOfVnodes; + if (pMsg->msgType == TSDB_MSG_TYPE_SUBMIT || pMsg->msgType == TSDB_MSG_TYPE_MD_DROP_STABLE) { + SMsgDesc *pDesc = (SMsgDesc *)pCont; + pDesc->numOfVnodes = htonl(pDesc->numOfVnodes); + pCont += sizeof(SMsgDesc); + if (pDesc->numOfVnodes > 1) { + pRpcContext = calloc(sizeof(SRpcContext), 1); + pRpcContext->numOfVnodes = pDesc->numOfVnodes; + } } while (leftLen > 0) { - // todo: parse head, get vgId, contLen + SMsgHead *pHead = (SMsgHead *) pCont; + pHead->vgId = htonl(pHead->vgId); + pHead->contLen = htonl(pHead->contLen); - // get pVnode from vgId - void *pVnode = dnodeGetVnode(vgId); + void *pVnode = dnodeGetVnode(pHead->vgId); if (pVnode == NULL) { - + leftLen -= pHead->contLen; + pCont -= pHead->contLen; continue; } // put message into queue - SWriteMsg *pWriteMsg = taosAllocateQitem(sizeof(SWriteMsg)); - pWriteMsg->rpcMsg = *pMsg; - pWriteMsg->pCont = pCont; - pWriteMsg->contLen = contLen; - pWriteMsg->pRpcContext = pRpcContext; + SWriteMsg writeMsg; + writeMsg.rpcMsg = *pMsg; + writeMsg.pCont = pCont; + writeMsg.contLen = pHead->contLen; + writeMsg.pRpcContext = pRpcContext; + writeMsg.pVnode = pVnode; // pVnode shall be saved for usage later taos_queue queue = dnodeGetVnodeWworker(pVnode); - taosWriteQitem(queue, 0, pWriteMsg); - + taosWriteQitem(queue, &writeMsg); + // next vnode - leftLen -= contLen; - pCont -= contLen; + leftLen -= pHead->contLen; + pCont -= pHead->contLen; + queuedMsgNum++; + } + + if (queuedMsgNum == 0) { + SRpcMsg rpcRsp = { + .handle = pMsg->handle, + .pCont = NULL, + .contLen = 0, + .code = TSDB_CODE_INVALID_VGROUP_ID, + .msgType = 0 + }; + rpcSendResponse(&rpcRsp); } } void *dnodeAllocateWriteWorker(void *pVnode) { SWriteWorker *pWorker = wWorkerPool.writeWorker + wWorkerPool.nextId; + taos_queue *queue = taosOpenQueue(sizeof(SWriteMsg)); + if (queue == NULL) return NULL; if (pWorker->qset == NULL) { pWorker->qset = taosOpenQset(); if (pWorker->qset == NULL) return NULL; + taosAddIntoQset(pWorker->qset, queue); + wWorkerPool.nextId = (wWorkerPool.nextId + 1) % wWorkerPool.max; + pthread_attr_t thAttr; pthread_attr_init(&thAttr); pthread_attr_setdetachstate(&thAttr, PTHREAD_CREATE_JOINABLE); @@ -136,14 +163,11 @@ void *dnodeAllocateWriteWorker(void *pVnode) { dError("failed to create thread to process read queue, reason:%s", strerror(errno)); taosCloseQset(pWorker->qset); } - } - - taos_queue *queue = taosOpenQueue(); - if (queue) { - taosAddIntoQset(pWorker->qset, queue, pVnode); + } else { + taosAddIntoQset(pWorker->qset, queue); wWorkerPool.nextId = (wWorkerPool.nextId + 1) % wWorkerPool.max; } - + return queue; } @@ -244,14 +268,177 @@ static void dnodeHandleIdleWorker(SWriteWorker *pWorker) { } } -static void dnodeProcessSubmitMsg(void *param, SWriteMsg *pMsg) { +static void dnodeProcessSubmitMsg(SWriteMsg *pMsg) { + dTrace("submit msg is disposed"); + + SShellSubmitRspMsg *pRsp = rpcMallocCont(sizeof(SShellSubmitRspMsg)); + pRsp->code = 0; + pRsp->numOfRows = htonl(1); + pRsp->affectedRows = htonl(1); + pRsp->numOfFailedBlocks = 0; + + // todo write to tsdb + + SRpcMsg rpcRsp = { + .handle = pMsg->rpcMsg.handle, + .pCont = pRsp, + .contLen = sizeof(SShellSubmitRspMsg), + .code = 0, + .msgType = 0 + }; + rpcSendResponse(&rpcRsp); +} +static void dnodeProcessCreateTableMsg(SWriteMsg *pMsg) { + SMDCreateTableMsg *pTable = pMsg->rpcMsg.pCont; + SRpcMsg rpcRsp = {.handle = pMsg->rpcMsg.handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; + + dTrace("table:%s, start to create in dnode, vgroup:%d", pTable->tableId, pTable->vgId); + pTable->numOfColumns = htons(pTable->numOfColumns); + pTable->numOfTags = htons(pTable->numOfTags); + pTable->sid = htonl(pTable->sid); + pTable->sversion = htonl(pTable->sversion); + pTable->tagDataLen = htonl(pTable->tagDataLen); + pTable->sqlDataLen = htonl(pTable->sqlDataLen); + pTable->uid = htobe64(pTable->uid); + pTable->superTableUid = htobe64(pTable->superTableUid); + pTable->createdTime = htobe64(pTable->createdTime); + SSchema *pSchema = (SSchema *) pTable->data; + + int totalCols = pTable->numOfColumns + pTable->numOfTags; + for (int i = 0; i < totalCols; i++) { + pSchema[i].colId = htons(pSchema[i].colId); + pSchema[i].bytes = htons(pSchema[i].bytes); + } + + STableCfg tCfg; + tsdbInitTableCfg(&tCfg, pTable->tableType, pTable->uid, pTable->sid); + + STSchema *pDestSchema = tdNewSchema(pTable->numOfColumns); + for (int i = 0; i < pTable->numOfColumns; i++) { + tdSchemaAppendCol(pDestSchema, pSchema[i].type, pSchema[i].colId, pSchema[i].bytes); + } + tsdbTableSetSchema(&tCfg, pDestSchema, false); + + if (pTable->numOfTags != 0) { + STSchema *pDestTagSchema = tdNewSchema(pTable->numOfTags); + for (int i = pTable->numOfColumns; i < totalCols; i++) { + tdSchemaAppendCol(pDestTagSchema, pSchema[i].type, pSchema[i].colId, pSchema[i].bytes); + } + tsdbTableSetTagSchema(&tCfg, pDestTagSchema, false); + + char *pTagData = pTable->data + totalCols * sizeof(SSchema); + int accumBytes = 0; + SDataRow dataRow = tdNewDataRowFromSchema(pDestTagSchema); + + for (int i = 0; i < pTable->numOfTags; i++) { + tdAppendColVal(dataRow, pTagData + accumBytes, pDestTagSchema->columns + i); + accumBytes += pSchema[i + pTable->numOfColumns].bytes; + } + tsdbTableSetTagValue(&tCfg, dataRow, false); + } + + void *pTsdb = dnodeGetVnodeTsdb(pMsg->pVnode); + + rpcRsp.code = tsdbCreateTable(pTsdb, &tCfg); + dnodeReleaseVnode(pMsg->pVnode); + + dTrace("table:%s, create table result:%s", pTable->tableId, tstrerror(rpcRsp.code)); + rpcSendResponse(&rpcRsp); } -static void dnodeProcessCreateTableMsg(void *param, SWriteMsg *pMsg) { +static void dnodeProcessDropTableMsg(SWriteMsg *pMsg) { + SMDDropTableMsg *pTable = pMsg->rpcMsg.pCont; + SRpcMsg rpcRsp = {.handle = pMsg->rpcMsg.handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; + + dTrace("table:%s, start to drop in dnode, vgroup:%d", pTable->tableId, pTable->vgId); + STableId tableId = { + .uid = htobe64(pTable->uid), + .tid = htonl(pTable->sid) + }; + void *pTsdb = dnodeGetVnodeTsdb(pMsg->pVnode); + + rpcRsp.code = tsdbDropTable(pTsdb, tableId); + dnodeReleaseVnode(pMsg->pVnode); + + dTrace("table:%s, drop table result:%s", pTable->tableId, tstrerror(rpcRsp.code)); + rpcSendResponse(&rpcRsp); } -static void dnodeProcessDropTableMsg(void *param, SWriteMsg *pMsg) { +static void dnodeProcessAlterTableMsg(SWriteMsg *pMsg) { + SMDCreateTableMsg *pTable = pMsg->rpcMsg.pCont; + SRpcMsg rpcRsp = {.handle = pMsg->rpcMsg.handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; + + dTrace("table:%s, start to alter in dnode, vgroup:%d", pTable->tableId, pTable->vgId); + pTable->numOfColumns = htons(pTable->numOfColumns); + pTable->numOfTags = htons(pTable->numOfTags); + pTable->sid = htonl(pTable->sid); + pTable->sversion = htonl(pTable->sversion); + pTable->tagDataLen = htonl(pTable->tagDataLen); + pTable->sqlDataLen = htonl(pTable->sqlDataLen); + pTable->uid = htobe64(pTable->uid); + pTable->superTableUid = htobe64(pTable->superTableUid); + pTable->createdTime = htobe64(pTable->createdTime); + SSchema *pSchema = (SSchema *) pTable->data; + + int totalCols = pTable->numOfColumns + pTable->numOfTags; + for (int i = 0; i < totalCols; i++) { + pSchema[i].colId = htons(pSchema[i].colId); + pSchema[i].bytes = htons(pSchema[i].bytes); + } + + STableCfg tCfg; + tsdbInitTableCfg(&tCfg, pTable->tableType, pTable->uid, pTable->sid); + + STSchema *pDestSchema = tdNewSchema(pTable->numOfColumns); + for (int i = 0; i < pTable->numOfColumns; i++) { + tdSchemaAppendCol(pDestSchema, pSchema[i].type, pSchema[i].colId, pSchema[i].bytes); + } + tsdbTableSetSchema(&tCfg, pDestSchema, false); + + if (pTable->numOfTags != 0) { + STSchema *pDestTagSchema = tdNewSchema(pTable->numOfTags); + for (int i = pTable->numOfColumns; i < totalCols; i++) { + tdSchemaAppendCol(pDestTagSchema, pSchema[i].type, pSchema[i].colId, pSchema[i].bytes); + } + tsdbTableSetSchema(&tCfg, pDestTagSchema, false); + + char *pTagData = pTable->data + totalCols * sizeof(SSchema); + int accumBytes = 0; + SDataRow dataRow = tdNewDataRowFromSchema(pDestTagSchema); + for (int i = 0; i < pTable->numOfTags; i++) { + tdAppendColVal(dataRow, pTagData + accumBytes, pDestTagSchema->columns + i); + accumBytes += pSchema[i + pTable->numOfColumns].bytes; + } + tsdbTableSetTagValue(&tCfg, dataRow, false); + } + + void *pTsdb = dnodeGetVnodeTsdb(pMsg->pVnode); + + rpcRsp.code = tsdbAlterTable(pTsdb, &tCfg); + dnodeReleaseVnode(pMsg->pVnode); + + dTrace("table:%s, alter table result:%s", pTable->tableId, tstrerror(rpcRsp.code)); + rpcSendResponse(&rpcRsp); +} + +static void dnodeProcessDropStableMsg(SWriteMsg *pMsg) { + SMDDropSTableMsg *pTable = pMsg->rpcMsg.pCont; + SRpcMsg rpcRsp = {.handle = pMsg->rpcMsg.handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; + + dTrace("stable:%s, start to it drop in dnode, vgroup:%d", pTable->tableId, pTable->vgId); + pTable->uid = htobe64(pTable->uid); + + // TODO: drop stable in vvnode + //void *pTsdb = dnodeGetVnodeTsdb(pMsg->pVnode); + //rpcRsp.code = tsdbDropSTable(pTsdb, pTable->uid); + + rpcRsp.code = TSDB_CODE_SUCCESS; + dnodeReleaseVnode(pMsg->pVnode); + + dTrace("stable:%s, drop stable result:%s", pTable->tableId, tstrerror(rpcRsp.code)); + rpcSendResponse(&rpcRsp); } + diff --git a/src/inc/dnode.h b/src/inc/dnode.h index ff893acd38a7ea8c80309d43c2499376155c2dda..b94d4cbacb44fae1acf267e54c6e2e59639b8436 100644 --- a/src/inc/dnode.h +++ b/src/inc/dnode.h @@ -21,7 +21,7 @@ extern "C" { #endif #include -#include +#include typedef struct { int32_t queryReqNum; @@ -29,33 +29,13 @@ typedef struct { int32_t httpReqNum; } SDnodeStatisInfo; -typedef struct { - char id[20]; - char sid; - void *thandle; - int mgmtIndex; - char status; // 0:offline, 1:online -} SMgmtObj; - -// global variables -extern uint32_t tsRebootTime; - -// dnodeCluster -extern void (*dnodeStartModules)(); -extern int32_t (*dnodeCheckSystem)(); - - -// dnodeSystem -extern void *tsDnodeMgmtQhandle; -void dnodeCheckDataDirOpenned(const char* dir); - -// dnodeModule -extern void (*dnodeStartModules)(); - - +typedef enum { + TSDB_DNODE_RUN_STATUS_INITIALIZE, + TSDB_DNODE_RUN_STATUS_RUNING, + TSDB_DNODE_RUN_STATUS_STOPPED +} SDnodeRunStatus; -void dnodeLockVnodes(); -void dnodeUnLockVnodes(); +SDnodeRunStatus dnodeGetRunStatus(); SDnodeStatisInfo dnodeGetStatisInfo(); #ifdef __cplusplus diff --git a/src/inc/mnode.h b/src/inc/mnode.h index 15f1e653c51e2d119e576ac0505d18ad7d98290e..be152fbf3cbc6000cb40affbe99ea07e3c62b05e 100644 --- a/src/inc/mnode.h +++ b/src/inc/mnode.h @@ -25,8 +25,6 @@ extern "C" { #include "taosdef.h" #include "taosmsg.h" #include "taoserror.h" - - #include "sdb.h" #include "tglobalcfg.h" #include "thash.h" @@ -41,62 +39,67 @@ extern "C" { #include "ttimer.h" #include "tutil.h" -// internal globals -extern char version[]; -extern void *tsMgmtTmr; -extern void *tsMgmtTranQhandle; -extern char tsMgmtDirectory[]; +typedef struct { + int32_t mnodeId; + uint32_t privateIp; + uint32_t publicIp; + int64_t createdTime; + int64_t lostTime; + uint64_t dbVersion; + uint32_t rack; + uint16_t idc; + uint16_t slot; + int8_t role; + int8_t status; + int8_t numOfMnodes; + int32_t numOfDnodes; + char mnodeName[TSDB_DNODE_NAME_LEN + 1]; + char reserved[7]; + char updateEnd[1]; + int syncFd; + void *hbTimer; + void *pSync; +} SMnodeObj; typedef struct { + int32_t dnodeId; uint32_t privateIp; - int32_t sid; + uint32_t publicIp; uint32_t moduleStatus; - int32_t openVnodes; - int32_t numOfVnodes; - int32_t numOfFreeVnodes; int64_t createdTime; - uint32_t publicIp; - int32_t status; uint32_t lastAccess; - uint32_t rebootTimes; - uint32_t lastReboot; // time stamp for last reboot + int32_t openVnodes; + int32_t numOfTotalVnodes; // from dnode status msg, config information + uint32_t rack; + uint16_t idc; + uint16_t slot; uint16_t numOfCores; // from dnode status msg - uint8_t alternativeRole; // from dnode status msg, 0-any, 1-mgmt, 2-dnode - uint8_t reserveStatus; - uint16_t numOfTotalVnodes; // from dnode status msg, config information - uint16_t unused; + int8_t alternativeRole; // from dnode status msg, 0-any, 1-mgmt, 2-dnode + int8_t lbStatus; // set in balance function + float lbScore; // calc in balance function + int32_t customScore; // config by user + char dnodeName[TSDB_DNODE_NAME_LEN + 1]; + char reserved[7]; + char updateEnd[1]; + SVnodeLoad vload[TSDB_MAX_VNODES]; + int32_t status; + uint32_t lastReboot; // time stamp for last reboot float diskAvailable; // from dnode status msg - int32_t bandwidthMb; // config by user + int16_t diskAvgUsage; // calc from sys.disk int16_t cpuAvgUsage; // calc from sys.cpu int16_t memoryAvgUsage; // calc from sys.mem - int16_t diskAvgUsage; // calc from sys.disk int16_t bandwidthUsage; // calc from sys.band - uint32_t rack; - uint16_t idc; - uint16_t slot; - int32_t customScore; // config by user - float lbScore; // calc in balance function - int16_t lbStatus; // set in balance function - int16_t lastAllocVnode; // increase while create vnode - SVnodeLoad vload[TSDB_MAX_VNODES]; - char reserved[16]; - char updateEnd[1]; - void * thandle; } SDnodeObj; typedef struct { - uint32_t ip; - uint32_t publicIp; + int32_t dnodeId; int32_t vnode; + uint32_t privateIp; + uint32_t publicIp; } SVnodeGid; typedef struct { - int32_t sid; - int32_t vgId; // vnode group ID -} STableGid; - -typedef struct { - char tableId[TSDB_TABLE_ID_LEN + 1]; + char tableId[TSDB_TABLE_ID_LEN]; int8_t type; int8_t dirty; uint64_t uid; @@ -118,7 +121,7 @@ typedef struct SSuperTableObj { int32_t sversion; int32_t numOfColumns; int32_t numOfTags; - int8_t reserved[7]; + int8_t reserved[5]; int8_t updateEnd[1]; int32_t numOfTables; int16_t nextColId; @@ -128,26 +131,28 @@ typedef struct SSuperTableObj { typedef struct { char tableId[TSDB_TABLE_ID_LEN + 1]; int8_t type; + int8_t dirty; uint64_t uid; int32_t sid; int32_t vgId; int64_t createdTime; char superTableId[TSDB_TABLE_ID_LEN + 1]; - int8_t reserved[7]; + int8_t reserved[1]; int8_t updateEnd[1]; SSuperTableObj *superTable; } SChildTableObj; typedef struct { - char tableId[TSDB_TABLE_ID_LEN + 1]; + char tableId[TSDB_TABLE_ID_LEN]; int8_t type; + int8_t dirty; uint64_t uid; int32_t sid; int32_t vgId; int64_t createdTime; int32_t sversion; int32_t numOfColumns; - int16_t sqlLen; + int32_t sqlLen; int8_t reserved[3]; int8_t updateEnd[1]; char* sql; //null-terminated string @@ -159,15 +164,13 @@ typedef struct _vg_obj { uint32_t vgId; char dbName[TSDB_DB_NAME_LEN + 1]; int64_t createdTime; - uint64_t lastCreate; - uint64_t lastRemove; - int32_t numOfVnodes; SVnodeGid vnodeGid[TSDB_VNODES_SUPPORT]; + int32_t numOfVnodes; int32_t numOfTables; int32_t lbIp; int32_t lbTime; int8_t lbStatus; - int8_t reserved[16]; + int8_t reserved[14]; int8_t updateEnd[1]; struct _vg_obj *prev, *next; void * idPool; @@ -176,19 +179,17 @@ typedef struct _vg_obj { typedef struct _db_obj { char name[TSDB_DB_NAME_LEN + 1]; + int8_t dirty; int64_t createdTime; SDbCfg cfg; - int8_t dropStatus; - char reserved[16]; + char reserved[15]; char updateEnd[1]; struct _db_obj *prev, *next; int32_t numOfVgroups; int32_t numOfTables; int32_t numOfSuperTables; - int32_t vgStatus; - SVgObj *pHead; // empty vgroup first - SVgObj *pTail; // empty vgroup end - void * vgTimer; + SVgObj *pHead; + SVgObj *pTail; } SDbObj; struct _acctObj; @@ -251,16 +252,32 @@ typedef struct { int16_t offset[TSDB_MAX_COLUMNS]; int16_t bytes[TSDB_MAX_COLUMNS]; void * signature; - uint16_t payloadLen; /* length of payload*/ - char payload[]; /* payload for wildcard match in show tables */ + uint16_t payloadLen; + char payload[]; } SShowObj; -//mgmtSystem +typedef struct { + uint8_t msgType; + int8_t expected; + int8_t received; + int8_t successed; + int32_t contLen; + int32_t code; + void *ahandle; + void *thandle; + void *pCont; + SDbObj *pDb; + SUserObj *pUser; +} SQueuedMsg; + int32_t mgmtInitSystem(); int32_t mgmtStartSystem(); void mgmtCleanUpSystem(); void mgmtStopSystem(); +extern char version[]; +extern void *tsMgmtTmr; +extern char tsMnodeDir[]; #ifdef __cplusplus } diff --git a/src/inc/taosdef.h b/src/inc/taosdef.h index 85bdfea98f4037bf684a8a64cacd574ba882dae8..5da8aa191b5e1ce5072a80599a0c64b3f44d9f15 100644 --- a/src/inc/taosdef.h +++ b/src/inc/taosdef.h @@ -82,6 +82,17 @@ extern const int32_t TYPE_BYTES[11]; #define TSDB_TIME_PRECISION_MILLI_STR "ms" #define TSDB_TIME_PRECISION_MICRO_STR "us" +#define T_MEMBER_SIZE(type, member) sizeof(((type *)0)->member) +#define T_APPEND_MEMBER(dst, ptr, type, member) \ +do {\ + memcpy((void *)(dst), (void *)(&((ptr)->member)), T_MEMBER_SIZE(type, member));\ + dst = (void *)((char *)(dst) + T_MEMBER_SIZE(type, member));\ +} while(0) +#define T_READ_MEMBER(src, type, target) \ +do { \ + (target) = *(type *)(src); \ + (src) = (void *)((char *)src + sizeof(type));\ +} while(0) #define TSDB_KEYSIZE sizeof(TSKEY) @@ -165,7 +176,8 @@ void tsDataSwap(void *pLeft, void *pRight, int32_t type, int32_t size); #define TSDB_MAX_COLUMNS 256 #define TSDB_MIN_COLUMNS 2 //PRIMARY COLUMN(timestamp) + other columns -#define TSDB_TABLE_NAME_LEN 64 +#define TSDB_DNODE_NAME_LEN 63 +#define TSDB_TABLE_NAME_LEN 192 #define TSDB_DB_NAME_LEN 32 #define TSDB_COL_NAME_LEN 64 #define TSDB_MAX_SAVED_SQL_LEN TSDB_MAX_COLUMNS * 16 @@ -297,6 +309,20 @@ void tsDataSwap(void *pLeft, void *pRight, int32_t type, int32_t size); #define TSDB_SESSIONS_PER_VNODE (300) #define TSDB_SESSIONS_PER_DNODE (TSDB_SESSIONS_PER_VNODE * TSDB_MAX_VNODES) +enum { + TSDB_PRECISION_MILLI, + TSDB_PRECISION_MICRO, + TSDB_PRECISION_NANO +}; + +typedef enum { + TSDB_SUPER_TABLE = 0, // super table + TSDB_CHILD_TABLE = 1, // table created from super table + TSDB_NORMAL_TABLE = 2, // ordinary table + TSDB_STREAM_TABLE = 3, // table created from stream computing + TSDB_TABLE_MAX = 4 +} TSDB_TABLE_TYPE; + #ifdef __cplusplus } #endif diff --git a/src/inc/taosmsg.h b/src/inc/taosmsg.h index a2efd917df8575fe1afa02709ff6f1271a468b89..467c2a2995ac15407364ed0413aa3979e4a43676 100644 --- a/src/inc/taosmsg.h +++ b/src/inc/taosmsg.h @@ -25,7 +25,6 @@ extern "C" { #include "taosdef.h" #include "taoserror.h" -#include "taosdef.h" #include "trpc.h" // message type @@ -38,7 +37,7 @@ extern "C" { #define TSDB_MSG_TYPE_RETRIEVE 7 #define TSDB_MSG_TYPE_RETRIEVE_RSP 8 -// message from mgmt to dnode +// message from mnode to dnode #define TSDB_MSG_TYPE_MD_CREATE_TABLE 9 #define TSDB_MSG_TYPE_MD_CREATE_TABLE_RSP 10 #define TSDB_MSG_TYPE_MD_DROP_TABLE 11 @@ -58,84 +57,76 @@ extern "C" { #define TSDB_MSG_TYPE_MD_CONFIG_DNODE 25 #define TSDB_MSG_TYPE_MD_CONFIG_DNODE_RSP 26 - +// message from client to mnode +#define TSDB_MSG_TYPE_CM_CONNECT 31 +#define TSDB_MSG_TYPE_CM_CONNECT_RSP 32 +#define TSDB_MSG_TYPE_CM_CREATE_ACCT 33 +#define TSDB_MSG_TYPE_CM_CREATE_ACCT_RSP 34 +#define TSDB_MSG_TYPE_CM_ALTER_ACCT 35 +#define TSDB_MSG_TYPE_CM_ALTER_ACCT_RSP 36 +#define TSDB_MSG_TYPE_CM_DROP_ACCT 37 +#define TSDB_MSG_TYPE_CM_DROP_ACCT_RSP 38 +#define TSDB_MSG_TYPE_CM_CREATE_USER 39 +#define TSDB_MSG_TYPE_CM_CREATE_USER_RSP 40 +#define TSDB_MSG_TYPE_CM_ALTER_USER 41 +#define TSDB_MSG_TYPE_CM_ALTER_USER_RSP 42 +#define TSDB_MSG_TYPE_CM_DROP_USER 43 +#define TSDB_MSG_TYPE_CM_DROP_USER_RSP 44 +#define TSDB_MSG_TYPE_CM_CREATE_DNODE 45 +#define TSDB_MSG_TYPE_CM_CREATE_DNODE_RSP 46 +#define TSDB_MSG_TYPE_CM_DROP_DNODE 47 +#define TSDB_MSG_TYPE_CM_DROP_DNODE_RSP 48 #define TSDB_MSG_TYPE_CM_CONFIG_DNODE TSDB_MSG_TYPE_MD_CONFIG_DNODE #define TSDB_MSG_TYPE_CM_CONFIG_DNODE_RSP TSDB_MSG_TYPE_MD_CONFIG_DNODE_RSP - -#define TSDB_MSG_TYPE_DM_CONFIG_VNODE 19 -#define TSDB_MSG_TYPE_DM_CONFIG_VNODE_RSP 20 - - -#define TSDB_MSG_TYPE_SDB_SYNC 21 -#define TSDB_MSG_TYPE_SDB_SYNC_RSP 22 -#define TSDB_MSG_TYPE_SDB_FORWARD 23 -#define TSDB_MSG_TYPE_SDB_FORWARD_RSP 24 -#define TSDB_MSG_TYPE_CONNECT 31 -#define TSDB_MSG_TYPE_CONNECT_RSP 32 -#define TSDB_MSG_TYPE_CREATE_ACCT 33 -#define TSDB_MSG_TYPE_CREATE_ACCT_RSP 34 -#define TSDB_MSG_TYPE_ALTER_ACCT 35 -#define TSDB_MSG_TYPE_ALTER_ACCT_RSP 36 -#define TSDB_MSG_TYPE_DROP_ACCT 37 -#define TSDB_MSG_TYPE_DROP_ACCT_RSP 38 -#define TSDB_MSG_TYPE_CREATE_USER 39 -#define TSDB_MSG_TYPE_CREATE_USER_RSP 40 -#define TSDB_MSG_TYPE_ALTER_USER 41 -#define TSDB_MSG_TYPE_ALTER_USER_RSP 42 -#define TSDB_MSG_TYPE_DROP_USER 43 -#define TSDB_MSG_TYPE_DROP_USER_RSP 44 -#define TSDB_MSG_TYPE_CREATE_MNODE 45 -#define TSDB_MSG_TYPE_CREATE_MNODE_RSP 46 -#define TSDB_MSG_TYPE_DROP_MNODE 47 -#define TSDB_MSG_TYPE_DROP_MNODE_RSP 48 -#define TSDB_MSG_TYPE_CREATE_DNODE 49 -#define TSDB_MSG_TYPE_CREATE_DNODE_RSP 50 -#define TSDB_MSG_TYPE_DROP_DNODE 51 -#define TSDB_MSG_TYPE_DROP_DNODE_RSP 52 -#define TSDB_MSG_TYPE_ALTER_DNODE 53 -#define TSDB_MSG_TYPE_ALTER_DNODE_RSP 54 -#define TSDB_MSG_TYPE_CREATE_DB 55 -#define TSDB_MSG_TYPE_CREATE_DB_RSP 56 -#define TSDB_MSG_TYPE_DROP_DB 57 -#define TSDB_MSG_TYPE_DROP_DB_RSP 58 -#define TSDB_MSG_TYPE_USE_DB 59 -#define TSDB_MSG_TYPE_USE_DB_RSP 60 -#define TSDB_MSG_TYPE_ALTER_DB 61 -#define TSDB_MSG_TYPE_ALTER_DB_RSP 62 -#define TSDB_MSG_TYPE_CREATE_TABLE 63 -#define TSDB_MSG_TYPE_CREATE_TABLE_RSP 64 -#define TSDB_MSG_TYPE_DROP_TABLE 65 -#define TSDB_MSG_TYPE_DROP_TABLE_RSP 66 -#define TSDB_MSG_TYPE_ALTER_TABLE 67 -#define TSDB_MSG_TYPE_ALTER_TABLE_RSP 68 - -#define TSDB_MSG_TYPE_TABLE_CFG 71 -#define TSDB_MSG_TYPE_TABLE_CFG_RSP 72 -#define TSDB_MSG_TYPE_TABLE_META 73 -#define TSDB_MSG_TYPE_TABLE_META_RSP 74 -#define TSDB_MSG_TYPE_STABLE_META 75 -#define TSDB_MSG_TYPE_STABLE_META_RSP 76 -#define TSDB_MSG_TYPE_MULTI_TABLE_META 77 -#define TSDB_MSG_TYPE_MULTI_TABLE_META_RSP 78 -#define TSDB_MSG_TYPE_ALTER_STREAM 79 -#define TSDB_MSG_TYPE_ALTER_STREAM_RSP 80 -#define TSDB_MSG_TYPE_SHOW 81 -#define TSDB_MSG_TYPE_SHOW_RSP 82 -#define TSDB_MSG_TYPE_CFG_MNODE 83 -#define TSDB_MSG_TYPE_CFG_MNODE_RSP 84 -#define TSDB_MSG_TYPE_KILL_QUERY 85 -#define TSDB_MSG_TYPE_KILL_QUERY_RSP 86 -#define TSDB_MSG_TYPE_KILL_STREAM 87 -#define TSDB_MSG_TYPE_KILL_STREAM_RSP 88 -#define TSDB_MSG_TYPE_KILL_CONNECTION 89 -#define TSDB_MSG_TYPE_KILL_CONNECTION_RSP 90 -#define TSDB_MSG_TYPE_HEARTBEAT 91 -#define TSDB_MSG_TYPE_HEARTBEAT_RSP 92 -#define TSDB_MSG_TYPE_STATUS 93 -#define TSDB_MSG_TYPE_STATUS_RSP 94 -#define TSDB_MSG_TYPE_GRANT 95 -#define TSDB_MSG_TYPE_GRANT_RSP 96 -#define TSDB_MSG_TYPE_MAX 97 +#define TSDB_MSG_TYPE_CM_CREATE_DB 49 +#define TSDB_MSG_TYPE_CM_CREATE_DB_RSP 50 +#define TSDB_MSG_TYPE_CM_DROP_DB 51 +#define TSDB_MSG_TYPE_CM_DROP_DB_RSP 52 +#define TSDB_MSG_TYPE_CM_USE_DB 53 +#define TSDB_MSG_TYPE_CM_USE_DB_RSP 54 +#define TSDB_MSG_TYPE_CM_ALTER_DB 55 +#define TSDB_MSG_TYPE_CM_ALTER_DB_RSP 56 +#define TSDB_MSG_TYPE_CM_CREATE_TABLE 57 +#define TSDB_MSG_TYPE_CM_CREATE_TABLE_RSP 58 +#define TSDB_MSG_TYPE_CM_DROP_TABLE 59 +#define TSDB_MSG_TYPE_CM_DROP_TABLE_RSP 60 +#define TSDB_MSG_TYPE_CM_ALTER_TABLE 61 +#define TSDB_MSG_TYPE_CM_ALTER_TABLE_RSP 62 +#define TSDB_MSG_TYPE_CM_TABLE_META 63 +#define TSDB_MSG_TYPE_CM_TABLE_META_RSP 64 +#define TSDB_MSG_TYPE_CM_STABLE_META 65 +#define TSDB_MSG_TYPE_CM_STABLE_META_RSP 66 +#define TSDB_MSG_TYPE_CM_TABLES_META 67 +#define TSDB_MSG_TYPE_CM_TABLES_META_RSP 68 +#define TSDB_MSG_TYPE_CM_ALTER_STREAM 69 +#define TSDB_MSG_TYPE_CM_ALTER_STREAM_RSP 70 +#define TSDB_MSG_TYPE_CM_SHOW 71 +#define TSDB_MSG_TYPE_CM_SHOW_RSP 72 +#define TSDB_MSG_TYPE_CM_KILL_QUERY 73 +#define TSDB_MSG_TYPE_CM_KILL_QUERY_RSP 74 +#define TSDB_MSG_TYPE_CM_KILL_STREAM 75 +#define TSDB_MSG_TYPE_CM_KILL_STREAM_RSP 76 +#define TSDB_MSG_TYPE_CM_KILL_CONN 77 +#define TSDB_MSG_TYPE_CM_KILL_CONN_RSP 78 +#define TSDB_MSG_TYPE_CM_HEARTBEAT 79 +#define TSDB_MSG_TYPE_CM_HEARTBEAT_RSP 80 + +// message from dnode to mnode +#define TSDB_MSG_TYPE_DM_CONFIG_TABLE 91 +#define TSDB_MSG_TYPE_DM_CONFIG_TABLE_RSP 92 +#define TSDB_MSG_TYPE_DM_CONFIG_VNODE 93 +#define TSDB_MSG_TYPE_DM_CONFIG_VNODE_RSP 94 +#define TSDB_MSG_TYPE_DM_STATUS 95 +#define TSDB_MSG_TYPE_DM_STATUS_RSP 96 +#define TSDB_MSG_TYPE_DM_GRANT 97 +#define TSDB_MSG_TYPE_DM_GRANT_RSP 98 + +#define TSDB_MSG_TYPE_SDB_SYNC 101 +#define TSDB_MSG_TYPE_SDB_SYNC_RSP 102 +#define TSDB_MSG_TYPE_SDB_FORWARD 103 +#define TSDB_MSG_TYPE_SDB_FORWARD_RSP 104 + +#define TSDB_MSG_TYPE_MAX 105 // IE type #define TSDB_IE_TYPE_SEC 1 @@ -185,22 +176,13 @@ enum _mgmt_table { #define TSDB_KILL_MSG_LEN 30 -typedef enum { - TSDB_TABLE_TYPE_SUPER_TABLE = 0, // super table - TSDB_TABLE_TYPE_CHILD_TABLE = 1, // table created from super table - TSDB_TABLE_TYPE_NORMAL_TABLE = 2, // ordinary table - TSDB_TABLE_TYPE_STREAM_TABLE = 3, // table created from stream computing - TSDB_TABLE_TYPE_MAX = 4 -} ETableType; - - #define TSDB_VN_READ_ACCCESS ((char)0x1) #define TSDB_VN_WRITE_ACCCESS ((char)0x2) #define TSDB_VN_ALL_ACCCESS (TSDB_VN_READ_ACCCESS | TSDB_VN_WRITE_ACCCESS) -#define TSDB_COL_NORMAL 0x0U -#define TSDB_COL_TAG 0x1U -#define TSDB_COL_JOIN 0x2U +#define TSDB_COL_NORMAL 0x0u +#define TSDB_COL_TAG 0x1u +#define TSDB_COL_JOIN 0x2u extern char *taosMsg[]; @@ -216,10 +198,20 @@ typedef struct { } SShellSubmitBlock; typedef struct { + int32_t numOfVnodes; +} SMsgDesc; + +typedef struct SMsgHead { + int32_t contLen; + int32_t vgId; +} SMsgHead; + +typedef struct { + SMsgDesc desc; + SMsgHead header; int16_t import; - int16_t vnode; - int32_t numOfSid; /* total number of sid */ - char blks[]; /* numOfSid blocks, each blocks for one table */ + int32_t numOfTables; // total number of sid + char blks[]; // number of data blocks, each table has at least one data block } SShellSubmitMsg; typedef struct { @@ -240,7 +232,7 @@ typedef struct { typedef struct SSchema { uint8_t type; - char name[TSDB_COL_NAME_LEN + 1]; + char name[TSDB_COL_NAME_LEN]; int16_t colId; int16_t bytes; } SSchema; @@ -248,9 +240,11 @@ typedef struct SSchema { typedef struct { int32_t vnode; //the index of vnode uint32_t ip; -} SVPeerDesc; +} SVnodeDesc; typedef struct { + int32_t contLen; + int32_t vgId; int8_t tableType; int16_t numOfColumns; int16_t numOfTags; @@ -258,47 +252,45 @@ typedef struct { int32_t sversion; int32_t tagDataLen; int32_t sqlDataLen; - int32_t contLen; - int32_t numOfVPeers; uint64_t uid; uint64_t superTableUid; uint64_t createdTime; - SVPeerDesc vpeerDesc[TSDB_MAX_MPEERS]; - char tableId[TSDB_TABLE_ID_LEN + 1]; - char superTableId[TSDB_TABLE_ID_LEN + 1]; + char tableId[TSDB_TABLE_ID_LEN]; + char superTableId[TSDB_TABLE_ID_LEN]; char data[]; -} SDMCreateTableMsg; +} SMDCreateTableMsg; typedef struct { - char tableId[TSDB_TABLE_ID_LEN + 1]; - char db[TSDB_DB_NAME_LEN + 1]; + char tableId[TSDB_TABLE_ID_LEN]; + char db[TSDB_DB_NAME_LEN]; int8_t igExists; int16_t numOfTags; int16_t numOfColumns; int16_t sqlLen; // the length of SQL, it starts after schema , sql is a null-terminated string - int16_t reserved[16]; + int32_t contLen; + int8_t reserved[16]; char schema[]; -} SCreateTableMsg; +} SCMCreateTableMsg; typedef struct { - char tableId[TSDB_TABLE_ID_LEN + 1]; + char tableId[TSDB_TABLE_ID_LEN]; int8_t igNotExists; -} SDropTableMsg; +} SCMDropTableMsg; typedef struct { - char tableId[TSDB_TABLE_ID_LEN + 1]; - char db[TSDB_DB_NAME_LEN + 1]; + char tableId[TSDB_TABLE_ID_LEN]; + char db[TSDB_DB_NAME_LEN]; int16_t type; /* operation type */ char tagVal[TSDB_MAX_BYTES_PER_ROW]; int8_t numOfCols; /* number of schema */ SSchema schema[]; -} SAlterTableMsg; +} SCMAlterTableMsg; typedef struct { char clientVersion[TSDB_VERSION_LEN]; char msgVersion[TSDB_VERSION_LEN]; char db[TSDB_TABLE_ID_LEN + 1]; -} SConnectMsg; +} SCMConnectMsg; typedef struct { char acctId[TSDB_ACCT_LEN + 1]; @@ -306,7 +298,7 @@ typedef struct { int8_t writeAuth; int8_t superAuth; SRpcIpSet ipList; -} SConnectRsp; +} SCMConnectRsp; typedef struct { int32_t maxUsers; @@ -326,39 +318,40 @@ typedef struct { char user[TSDB_USER_LEN + 1]; char pass[TSDB_KEY_LEN + 1]; SAcctCfg cfg; -} SCreateAcctMsg, SAlterAcctMsg; +} SCMCreateAcctMsg, SCMAlterAcctMsg; typedef struct { char user[TSDB_USER_LEN + 1]; -} SDropUserMsg, SDropAcctMsg; +} SCMDropUserMsg, SCMDropAcctMsg; typedef struct { char user[TSDB_USER_LEN + 1]; char pass[TSDB_KEY_LEN + 1]; int8_t privilege; int8_t flag; -} SCreateUserMsg, SAlterUserMsg; +} SCMCreateUserMsg, SCMAlterUserMsg; typedef struct { char db[TSDB_TABLE_ID_LEN + 1]; } SMgmtHead; typedef struct { + int32_t contLen; + int32_t vgId; int32_t sid; - int32_t numOfVPeers; uint64_t uid; - SVPeerDesc vpeerDesc[TSDB_MAX_MPEERS]; char tableId[TSDB_TABLE_ID_LEN + 1]; } SMDDropTableMsg; typedef struct { - char tableId[TSDB_TABLE_ID_LEN + 1]; + int32_t contLen; + int32_t vgId; int64_t uid; -} SDRemoveSuperTableMsg; + char tableId[TSDB_TABLE_ID_LEN + 1]; +} SMDDropSTableMsg; typedef struct { - int32_t vgId; - int32_t vnode; + int32_t vgId; } SMDDropVnodeMsg; typedef struct SColIndexEx { @@ -403,7 +396,7 @@ typedef struct SSqlBinaryExprInfo { typedef struct SSqlFunctionExpr { SSqlFuncExprMsg pBase; - SSqlBinaryExprInfo pBinExprInfo; + SSqlBinaryExprInfo binExprInfo; int16_t resBytes; int16_t resType; int16_t interResBytes; @@ -442,15 +435,16 @@ typedef struct SColumnInfo { SColumnFilterInfo *filters; } SColumnInfo; -/* - * enable vnode to understand how to group several tables with different tag; - */ -typedef struct STableSidExtInfo { +typedef struct STableIdInfo { int32_t sid; int64_t uid; - TSKEY key; // key for subscription - char tags[]; -} STableSidExtInfo; + TSKEY key; // last accessed ts, for subscription +} STableIdInfo; + +typedef struct STimeWindow { + TSKEY skey; + TSKEY ekey; +} STimeWindow; /* * the outputCols is equalled to or larger than numOfCols @@ -458,47 +452,46 @@ typedef struct STableSidExtInfo { * the outputCols will be 3 while the numOfCols is 1. */ typedef struct { - int16_t vnode; - int32_t numOfSids; - uint64_t pSidExtInfo; // table id & tag info ptr, in windows pointer may - + int32_t contLen; // msg header + int16_t vgId; + + int32_t numOfTables; uint64_t uid; - TSKEY skey; - TSKEY ekey; + STimeWindow window; int16_t order; int16_t orderColId; int16_t numOfCols; // the number of columns will be load from vnode - char intervalTimeUnit; // time interval type, for revisement of interval(1d) + char slidingTimeUnit; // time interval type, for revisement of interval(1d) - int64_t intervalTime; // time interval for aggregation, in million second + int64_t intervalTime; // time interval for aggregation, in million second int64_t slidingTime; // value for sliding window // tag schema, used to parse tag information in pSidExtInfo uint64_t pTagSchema; - int16_t numOfTagsCols; // required number of tags - int16_t tagLength; // tag length in current query + int16_t numOfTagsCols; // required number of tags + int16_t tagLength; // tag length in current query int16_t numOfGroupCols; // num of group by columns int16_t orderByIdx; int16_t orderType; // used in group by xx order by xxx uint64_t groupbyTagIds; - int64_t limit; - int64_t offset; + int64_t limit; + int64_t offset; - int16_t queryType; // denote another query process - int16_t numOfOutputCols; // final output columns numbers + int16_t queryType; // denote another query process + int16_t numOfOutputCols; // final output columns numbers int16_t interpoType; // interpolate type uint64_t defaultVal; // default value array list - int32_t colNameLen; - int64_t colNameList; + int32_t colNameLen; + int64_t colNameList; - int64_t pSqlFuncExprs; + int64_t pSqlFuncExprs; int32_t tsOffset; // offset value in current msg body, NOTE: ts list is compressed int32_t tsLen; // total length of ts comp block @@ -508,7 +501,7 @@ typedef struct { } SQueryTableMsg; typedef struct { - char code; + int32_t code; uint64_t qhandle; } SQueryTableRsp; @@ -526,21 +519,21 @@ typedef struct { } SRetrieveTableRsp; typedef struct { - uint32_t vnode; - uint32_t vgId; - uint8_t status; - uint8_t dropStatus; - uint8_t accessState; - int64_t totalStorage; - int64_t compStorage; - int64_t pointsWritten; - uint8_t syncStatus; - uint8_t reserved[15]; + int32_t vgId; + int32_t vnode; + int64_t totalStorage; + int64_t compStorage; + int64_t pointsWritten; + uint8_t status; + uint8_t syncStatus; + uint8_t accessState; + uint8_t reserved[5]; } SVnodeLoad; typedef struct { uint32_t vnode; - char accessState; + uint8_t accessState; + uint8_t reserved[3]; } SVnodeAccess; /* @@ -570,12 +563,12 @@ typedef struct { int8_t loadLatest; // load into mem or not uint8_t precision; // time resolution int8_t reserved[16]; -} SVnodeCfg, SCreateDbMsg, SDbCfg, SAlterDbMsg; +} SVnodeCfg, SDbCfg, SCMCreateDbMsg, SCMAlterDbMsg; typedef struct { char db[TSDB_TABLE_ID_LEN + 1]; uint8_t ignoreNotExists; -} SDropDbMsg, SUseDbMsg; +} SCMDropDbMsg, SCMUseDbMsg; // IMPORTANT: sizeof(SVnodeStatisticInfo) should not exceed // TSDB_FILE_HEADER_LEN/4 - TSDB_FILE_HEADER_VERSION_SIZE @@ -588,14 +581,16 @@ typedef struct { } SVnodeStatisticInfo; typedef struct { + int32_t dnodeId; uint32_t moduleStatus; uint32_t createdTime; uint32_t numOfVnodes; - uint32_t reserved; } SDnodeState; typedef struct { uint32_t version; + int32_t dnodeId; + char dnodeName[TSDB_DNODE_NAME_LEN]; uint32_t privateIp; uint32_t publicIp; uint32_t lastReboot; // time stamp for last reboot @@ -606,52 +601,38 @@ typedef struct { uint8_t alternativeRole; uint8_t reserve[15]; SVnodeLoad load[]; -} SStatusMsg; +} SDMStatusMsg; typedef struct { - int32_t code; - SDnodeState dnodeState; SRpcIpSet ipList; + SDnodeState dnodeState; SVnodeAccess vnodeAccess[]; -} SStatusRsp; +} SDMStatusRsp; typedef struct { - char spi; - char encrypt; - char secret[TSDB_KEY_LEN]; // key is changed if updated - char cipheringKey[TSDB_KEY_LEN]; -} SSecIe; - -typedef struct { - int32_t numOfVPeers; - SVPeerDesc vpeerDesc[]; -} SVpeerDescArray; - -typedef struct { - int32_t vnode; SVnodeCfg cfg; - SVPeerDesc vpeerDesc[TSDB_MAX_MPEERS]; + SVnodeDesc vpeerDesc[TSDB_MAX_MPEERS]; } SMDCreateVnodeMsg; typedef struct { char tableId[TSDB_TABLE_ID_LEN + 1]; int16_t createFlag; char tags[]; -} STableInfoMsg; +} SCMTableInfoMsg; typedef struct { int32_t numOfTables; char tableIds[]; -} SMultiTableInfoMsg; +} SCMMultiTableInfoMsg; typedef struct { char tableId[TSDB_TABLE_ID_LEN + 1]; -} SSuperTableInfoMsg; +} SCMSuperTableInfoMsg; typedef struct { int32_t numOfDnodes; uint32_t dnodeIps[]; -} SSuperTableInfoRsp; +} SCMSuperTableInfoRsp; typedef struct { int16_t elemLen; @@ -683,10 +664,10 @@ typedef struct { } SSuperTableMetaMsg; typedef struct { - SVPeerDesc vpeerDesc[TSDB_VNODES_SUPPORT]; + SVnodeDesc vpeerDesc[TSDB_VNODES_SUPPORT]; int16_t index; // used locally int32_t numOfSids; - int32_t pSidExtInfoList[]; // offset value of STableSidExtInfo + int32_t pSidExtInfoList[]; // offset value of STableIdInfo } SVnodeSidList; typedef struct { @@ -696,28 +677,27 @@ typedef struct { int32_t list[]; /* offset of SVnodeSidList, compared to the SSuperTableMeta struct */ } SSuperTableMeta; -typedef struct STableMeta { - char tableId[TSDB_TABLE_ID_LEN + 1]; // note: This field must be at the front +typedef struct STableMetaMsg { + char tableId[TSDB_TABLE_ID_LEN]; // note: This field must be at the front int32_t contLen; - uint8_t numOfTags : 6; - uint8_t precision : 2; - uint8_t tableType : 4; - uint8_t index : 4; // used locally + uint8_t numOfTags; + uint8_t precision; + uint8_t tableType; int16_t numOfColumns; - int16_t rowSize; // used locally, calculated in client int16_t sversion; + int8_t numOfVpeers; - SVPeerDesc vpeerDesc[TSDB_VNODES_SUPPORT]; + SVnodeDesc vpeerDesc[TSDB_VNODES_SUPPORT]; int32_t sid; - int32_t vgid; + int32_t vgId; uint64_t uid; SSchema schema[]; -} STableMeta; +} STableMetaMsg; typedef struct SMultiTableMeta { int32_t numOfTables; int32_t contLen; - STableMeta metas[]; + STableMetaMsg metas[]; } SMultiTableMeta; typedef struct { @@ -735,27 +715,27 @@ typedef struct { char db[TSDB_DB_NAME_LEN + 1]; uint16_t payloadLen; char payload[]; -} SShowMsg; +} SCMShowMsg; -typedef struct { - uint64_t qhandle; - STableMeta tableMeta; -} SShowRsp; +typedef struct SCMShowRsp { + uint64_t qhandle; + STableMetaMsg tableMeta; +} SCMShowRsp; typedef struct { char ip[32]; -} SCreateMnodeMsg, SDropMnodeMsg, SCreateDnodeMsg, SDropDnodeMsg; +} SCMCreateDnodeMsg, SCMDropDnodeMsg; typedef struct { uint32_t dnode; int32_t vnode; int32_t sid; -} STableCfgMsg; +} SDMConfigTableMsg; typedef struct { uint32_t dnode; int32_t vnode; -} SVpeerCfgMsg; +} SDMConfigVnodeMsg; typedef struct { char ip[32]; @@ -793,18 +773,18 @@ typedef struct { typedef struct { SQqueryList qlist; SStreamList slist; -} SHeartBeatMsg; +} SCMHeartBeatMsg; typedef struct { uint32_t queryId; uint32_t streamId; int8_t killConnection; SRpcIpSet ipList; -} SHeartBeatRsp; +} SCMHeartBeatRsp; typedef struct { char queryId[TSDB_KILL_MSG_LEN + 1]; -} SKillQueryMsg, SKillStreamMsg, SKillConnectionMsg; +} SCMKillQueryMsg, SCMKillStreamMsg, SCMKillConnMsg; typedef struct { int32_t vnode; @@ -813,7 +793,7 @@ typedef struct { uint64_t stime; // stream starting time int32_t status; char tableId[TSDB_TABLE_ID_LEN + 1]; -} SDAlterStreamMsg; +} SMDAlterStreamMsg; #pragma pack(pop) diff --git a/src/inc/trpc.h b/src/inc/trpc.h index c4374a5c98cde4c29e6115264710fee58a9ea8d3..e545abfed378f661c6ab58278590784c985b2672 100644 --- a/src/inc/trpc.h +++ b/src/inc/trpc.h @@ -84,7 +84,7 @@ void *rpcReallocCont(void *ptr, int contLen); void rpcSendRequest(void *thandle, SRpcIpSet *pIpSet, SRpcMsg *pMsg); void rpcSendResponse(SRpcMsg *pMsg); void rpcSendRedirectRsp(void *pConn, SRpcIpSet *pIpSet); -void rpcGetConnInfo(void *thandle, SRpcConnInfo *pInfo); +int rpcGetConnInfo(void *thandle, SRpcConnInfo *pInfo); #ifdef __cplusplus } diff --git a/src/kit/shell/src/shellDarwin.c b/src/kit/shell/src/shellDarwin.c index b624f5ee68535026580af25aa962a8f6a79f963e..13513426cdea898a550ae6ed141246e290e9e555 100644 --- a/src/kit/shell/src/shellDarwin.c +++ b/src/kit/shell/src/shellDarwin.c @@ -81,7 +81,7 @@ void shellParseArgument(int argc, char *argv[], struct arguments *arguments) { // for management port else if (strcmp(argv[i], "-P") == 0) { if (i < argc - 1) { - tsMgmtShellPort = atoi(argv[++i]); + tsMnodeShellPort = atoi(argv[++i]); } else { fprintf(stderr, "option -P requires an argument\n"); exit(EXIT_FAILURE); diff --git a/src/kit/shell/src/shellEngine.c b/src/kit/shell/src/shellEngine.c index 61fdb424e54435ce7dfa04b71219093d97e89bc7..0325e7f641ecea998927f909c02df724641890d3 100644 --- a/src/kit/shell/src/shellEngine.c +++ b/src/kit/shell/src/shellEngine.c @@ -68,7 +68,7 @@ TAOS *shellInit(struct arguments *args) { tsMeterMetaKeepTimer = 3000; // Connect to the database. - TAOS *con = taos_connect(args->host, args->user, args->password, args->database, tsMgmtShellPort); + TAOS *con = taos_connect(args->host, args->user, args->password, args->database, tsMnodeShellPort); if (con == NULL) { return con; } diff --git a/src/kit/shell/src/shellImport.c b/src/kit/shell/src/shellImport.c index dd04f935e7a30f6a8775b831c3ec726855f520f4..143a27a3710ae5cda3dee1bc987649612102f016 100644 --- a/src/kit/shell/src/shellImport.c +++ b/src/kit/shell/src/shellImport.c @@ -227,7 +227,7 @@ static void shellRunImportThreads(struct arguments* args) ShellThreadObj *pThread = threadObj + t; pThread->threadIndex = t; pThread->totalThreads = args->threadNum; - pThread->taos = taos_connect(args->host, args->user, args->password, args->database, tsMgmtShellPort); + pThread->taos = taos_connect(args->host, args->user, args->password, args->database, tsMnodeShellPort); if (pThread->taos == NULL) { fprintf(stderr, "ERROR: thread:%d failed connect to TDengine, error:%s\n", pThread->threadIndex, taos_errstr(pThread->taos)); exit(0); diff --git a/src/kit/shell/src/shellLinux.c b/src/kit/shell/src/shellLinux.c index 081b9eae319f3570ab11b67e075292648dd76161..cdf59a7293d8915153b6f148c86a52321b5384d9 100644 --- a/src/kit/shell/src/shellLinux.c +++ b/src/kit/shell/src/shellLinux.c @@ -62,7 +62,7 @@ static error_t parse_opt(int key, char *arg, struct argp_state *state) { if (arg) arguments->password = arg; break; case 'P': - tsMgmtShellPort = atoi(arg); + tsMnodeShellPort = atoi(arg); break; case 't': arguments->timezone = arg; diff --git a/src/kit/shell/src/shellMain.c b/src/kit/shell/src/shellMain.c index 6b184b53b62b11e13dff737f50d2d2bd12aabce9..a7b7e8383bafab2f76682488d131c0d2bfbe65d3 100644 --- a/src/kit/shell/src/shellMain.c +++ b/src/kit/shell/src/shellMain.c @@ -81,17 +81,6 @@ struct arguments args = { */ int main(int argc, char* argv[]) { /*setlocale(LC_ALL, "en_US.UTF-8"); */ - // - if (argc == 1) - { - printf("=== this a test for debug usage\n"); - void *taos = taos_connect(NULL, "root", "taosdata", NULL, 0); - taos_query(taos, "select * from d1.t6"); - while (1) { - sleep(1000); - } - } - // if (!checkVersion()) { exit(EXIT_FAILURE); diff --git a/src/kit/shell/src/shellWindows.c b/src/kit/shell/src/shellWindows.c index 61e6bcaf30b887a445134b28b0af022127b3e8ab..8863f4fa46c9b3baa6d9d390c6985ced1ce00758 100644 --- a/src/kit/shell/src/shellWindows.c +++ b/src/kit/shell/src/shellWindows.c @@ -61,7 +61,7 @@ void shellParseArgument(int argc, char *argv[], struct arguments *arguments) { // for management port else if (strcmp(argv[i], "-P") == 0) { if (i < argc - 1) { - tsMgmtShellPort = atoi(argv[++i]); + tsMnodeShellPort = atoi(argv[++i]); } else { fprintf(stderr, "option -P requires an argument\n"); exit(EXIT_FAILURE); diff --git a/src/mnode/CMakeLists.txt b/src/mnode/CMakeLists.txt index 16dbbcb740834e34fb2d8545a7926a054c1a122a..5bf4cfd604c81f14a95129bcd6887d2ca2904b4f 100644 --- a/src/mnode/CMakeLists.txt +++ b/src/mnode/CMakeLists.txt @@ -4,6 +4,7 @@ PROJECT(TDengine) IF ((TD_LINUX_64) OR (TD_LINUX_32 AND TD_ARM)) INCLUDE_DIRECTORIES(${TD_OS_DIR}/inc) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/inc) + INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/common/inc) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/util/inc) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/query/inc) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/dnode/inc) diff --git a/src/mnode/inc/mgmtBalance.h b/src/mnode/inc/mgmtBalance.h index ad55e0645c41f8f166637aca86fd0df14e174a4d..401074d1713b6ebe1ce95c3c882eaaf1499a85fe 100644 --- a/src/mnode/inc/mgmtBalance.h +++ b/src/mnode/inc/mgmtBalance.h @@ -23,6 +23,7 @@ extern "C" { int32_t mgmtInitBalance(); void mgmtCleanupBalance(); +void mgmtStartBalanceTimer(int32_t afterMs) ; int32_t mgmtAllocVnodes(SVgObj *pVgroup); #ifdef __cplusplus diff --git a/src/mnode/inc/mgmtChildTable.h b/src/mnode/inc/mgmtChildTable.h index 988e54936e78d160087896fc5938edf884325979..b16dd58f67402e84c2c611282fded15abea88e97 100644 --- a/src/mnode/inc/mgmtChildTable.h +++ b/src/mnode/inc/mgmtChildTable.h @@ -30,12 +30,15 @@ int32_t mgmtInitChildTables(); void mgmtCleanUpChildTables(); void * mgmtGetChildTable(char *tableId); -int32_t mgmtCreateChildTable(SCreateTableMsg *pCreate, int32_t contLen, SVgObj *pVgroup, int32_t sid, - SDMCreateTableMsg **pDCreateOut, STableInfo **pTableOut); -int32_t mgmtDropChildTable(SDbObj *pDb, SChildTableObj *pTable); +void *mgmtCreateChildTable(SCMCreateTableMsg *pCreate, SVgObj *pVgroup, int32_t sid); +void *mgmtBuildCreateChildTableMsg(SCMCreateTableMsg *pCreate, SChildTableObj *pTable); + +int32_t mgmtDropChildTable(SQueuedMsg *newMsg, SChildTableObj *pTable); int32_t mgmtModifyChildTableTagValueByName(SChildTableObj *pTable, char *tagName, char *nContent); -int32_t mgmtGetChildTableMeta(SDbObj *pDb, SChildTableObj *pTable, STableMeta *pMeta, bool usePublicIp); +int32_t mgmtGetChildTableMeta(SDbObj *pDb, SChildTableObj *pTable, STableMetaMsg *pMeta, bool usePublicIp); + +void mgmtDropAllChildTables(SDbObj *pDropDb); #ifdef __cplusplus } diff --git a/src/mnode/inc/mgmtDServer.h b/src/mnode/inc/mgmtDServer.h index 5f2679f266587066beb12c70d698b7a4625c14f2..937ae8f1acd46c48a4811195e7aa5916b0a1db73 100644 --- a/src/mnode/inc/mgmtDServer.h +++ b/src/mnode/inc/mgmtDServer.h @@ -24,21 +24,6 @@ int32_t mgmtInitDServer(); void mgmtCleanupDServer(); void mgmtAddDServerMsgHandle(uint8_t msgType, void (*fp)(SRpcMsg *rpcMsg)); - -//extern void *mgmtStatusTimer; -// -//void mgmtSendCreateTableMsg(SDMCreateTableMsg *pCreate, SRpcIpSet *ipSet, void *ahandle); -//void mgmtSendDropTableMsg(SMDDropTableMsg *pRemove, SRpcIpSet *ipSet, void *ahandle); -//void mgmtSendAlterStreamMsg(STableInfo *pTable, SRpcIpSet *ipSet, void *ahandle); -//void mgmtSendDropVnodeMsg(int32_t vnode, SRpcIpSet *ipSet, void *ahandle); -// -//int32_t mgmtInitDnodeInt(); -//void mgmtCleanUpDnodeInt(); -// -//void mgmtSendMsgToDnode(SRpcIpSet *ipSet, int8_t msgType, void *pCont, int32_t contLen, void *ahandle); -//void mgmtSendRspToDnode(void *pConn, int8_t msgType, int32_t code, void *pCont, int32_t contLen); -//void mgmtProcessMsgFromDnode(char msgType, void *pCont, int32_t contLen, void *pConn, int32_t code); - #ifdef __cplusplus } #endif diff --git a/src/mnode/inc/mgmtDnode.h b/src/mnode/inc/mgmtDnode.h index 2fd6cb3d8d395f527d77acb786b6807eb955f085..4dc82fefe381adee2a3f16e16d17523edd7aade1 100644 --- a/src/mnode/inc/mgmtDnode.h +++ b/src/mnode/inc/mgmtDnode.h @@ -25,7 +25,8 @@ int32_t mgmtInitDnodes(); void mgmtCleanUpDnodes(); int32_t mgmtGetDnodesNum(); int32_t mgmtUpdateDnode(SDnodeObj *pDnode); -SDnodeObj* mgmtGetDnode(uint32_t ip); +SDnodeObj* mgmtGetDnode(int32_t dnodeId); +SDnodeObj* mgmtGetDnodeByIp(uint32_t ip); bool mgmtCheckDnodeInRemoveState(SDnodeObj *pDnode); bool mgmtCheckDnodeInOfflineState(SDnodeObj *pDnode); diff --git a/src/mnode/inc/mgmtMnode.h b/src/mnode/inc/mgmtMnode.h index 27256d805aaef85e0d630249a8c6e17c714bc412..d768d2dd7cd7ca517a02d5dcce88545b178e2851 100644 --- a/src/mnode/inc/mgmtMnode.h +++ b/src/mnode/inc/mgmtMnode.h @@ -22,6 +22,11 @@ extern "C" { bool mgmtCheckRedirect(void *handle); +void mgmtGetMnodeIpList(SRpcIpSet *ipSet); + +int32_t mgmtAddMnode(uint32_t privateIp, uint32_t publicIp); +int32_t mgmtRemoveMnode(uint32_t privateIp); + #ifdef __cplusplus } #endif diff --git a/src/mnode/inc/mgmtNormalTable.h b/src/mnode/inc/mgmtNormalTable.h index efe83d77edc14e12bcd1e24580d69dd8e03f1e7b..dd09a62bb4bc5f2051d374725e3759ddf5c2dae5 100644 --- a/src/mnode/inc/mgmtNormalTable.h +++ b/src/mnode/inc/mgmtNormalTable.h @@ -28,13 +28,16 @@ int32_t mgmtInitNormalTables(); void mgmtCleanUpNormalTables(); void * mgmtGetNormalTable(char *tableId); -int32_t mgmtCreateNormalTable(SCreateTableMsg *pCreate, int32_t contLen, SVgObj *pVgroup, int32_t sid, - SDMCreateTableMsg **pDCreateOut, STableInfo **pTableOut); -int32_t mgmtDropNormalTable(SDbObj *pDb, SNormalTableObj *pTable); +void * mgmtCreateNormalTable(SCMCreateTableMsg *pCreate, SVgObj *pVgroup, int32_t sid); +void * mgmtBuildCreateNormalTableMsg(SNormalTableObj *pTable); + +int32_t mgmtDropNormalTable(SQueuedMsg *newMsg, SNormalTableObj *pTable); int32_t mgmtAddNormalTableColumn(SNormalTableObj *pTable, SSchema schema[], int32_t ncols); int32_t mgmtDropNormalTableColumnByName(SNormalTableObj *pTable, char *colName); -int32_t mgmtGetNormalTableMeta(SDbObj *pDb, SNormalTableObj *pTable, STableMeta *pMeta, bool usePublicIp); +int32_t mgmtGetNormalTableMeta(SDbObj *pDb, SNormalTableObj *pTable, STableMetaMsg *pMeta, bool usePublicIp); + +void mgmtDropAllNormalTables(SDbObj *pDropDb); #ifdef __cplusplus } diff --git a/src/mnode/inc/mgmtProfile.h b/src/mnode/inc/mgmtProfile.h index 31904666586cd5a1f7c349f07b75d0e90fc48762..7f9fd9622c7b782e1d796fc36f6340198702934b 100644 --- a/src/mnode/inc/mgmtProfile.h +++ b/src/mnode/inc/mgmtProfile.h @@ -28,22 +28,6 @@ bool mgmtCheckQhandle(uint64_t qhandle); void mgmtSaveQhandle(void *qhandle); void mgmtFreeQhandle(void *qhandle); -enum { - TSDB_PROCESS_CREATE_VGROUP, - TSDB_PROCESS_CREATE_VGROUP_GET_META, - TSDB_PROCESS_CREATE_TABLE, - TSDB_PROCESS_CREATE_TABLE_GET_META, -}; - -typedef struct { - void *thandle; // come from uplayer - void *ahandle; // object to process - void *cont; // additional information of object to process - int32_t type; // the type of sync process - int32_t received; // num of received, such as numOfVnodes - int32_t contLen; // the length of additional information -} SProcessInfo; - #ifdef __cplusplus } #endif diff --git a/src/mnode/inc/mgmtShell.h b/src/mnode/inc/mgmtShell.h index ff089dad7ebb110388002d93eaf9252562a4c1cd..56625b982aed2fa90b39d35be7014ba7ed85e6a4 100644 --- a/src/mnode/inc/mgmtShell.h +++ b/src/mnode/inc/mgmtShell.h @@ -23,13 +23,16 @@ extern "C" { int32_t mgmtInitShell(); void mgmtCleanUpShell(); -void mgmtAddShellMsgHandle(uint8_t msgType, void (*fp)(SRpcMsg *rpcMsg)); +void mgmtAddShellMsgHandle(uint8_t msgType, void (*fp)(SQueuedMsg *queuedMsg)); -typedef int32_t (*SShowMetaFp)(STableMeta *pMeta, SShowObj *pShow, void *pConn); +typedef int32_t (*SShowMetaFp)(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); typedef int32_t (*SShowRetrieveFp)(SShowObj *pShow, char *data, int32_t rows, void *pConn); void mgmtAddShellShowMetaHandle(uint8_t showType, SShowMetaFp fp); void mgmtAddShellShowRetrieveHandle(uint8_t showType, SShowRetrieveFp fp); +void mgmtAddToShellQueue(SQueuedMsg *queuedMsg); +void mgmtSendSimpleResp(void *thandle, int32_t code); + #ifdef __cplusplus } #endif diff --git a/src/mnode/inc/mgmtSuperTable.h b/src/mnode/inc/mgmtSuperTable.h index 609e8d079f90e82be6aa6b6faf1bf7f685d69eae..922aafed7f9344ad908893cb6c7e6278a541bf35 100644 --- a/src/mnode/inc/mgmtSuperTable.h +++ b/src/mnode/inc/mgmtSuperTable.h @@ -31,20 +31,22 @@ void mgmtCleanUpSuperTables(); void * mgmtGetSuperTable(char *tableId); -int32_t mgmtCreateSuperTable(SDbObj *pDb, SCreateTableMsg *pCreate); -int32_t mgmtDropSuperTable(SDbObj *pDb, SSuperTableObj *pTable); +int32_t mgmtCreateSuperTable(SCMCreateTableMsg *pCreate); +int32_t mgmtDropSuperTable(SQueuedMsg *newMsg, SDbObj *pDb, SSuperTableObj *pTable); int32_t mgmtAddSuperTableTag(SSuperTableObj *pTable, SSchema schema[], int32_t ntags); int32_t mgmtDropSuperTableTag(SSuperTableObj *pTable, char *tagName); int32_t mgmtModifySuperTableTagNameByName(SSuperTableObj *pTable, char *oldTagName, char *newTagName); int32_t mgmtAddSuperTableColumn(SSuperTableObj *pTable, SSchema schema[], int32_t ncols); int32_t mgmtDropSuperTableColumnByName(SSuperTableObj *pTable, char *colName); -int32_t mgmtGetSuperTableMeta(SDbObj *pDb, SSuperTableObj *pTable, STableMeta *pMeta, bool usePublicIp); +int32_t mgmtGetSuperTableMeta(SDbObj *pDb, SSuperTableObj *pTable, STableMetaMsg *pMeta, bool usePublicIp); void * mgmtGetSuperTableVgroup(SSuperTableObj *pStable); int32_t mgmtFindSuperTableTagIndex(SSuperTableObj *pTable, const char *tagName); int32_t mgmtSetSchemaFromSuperTable(SSchema *pSchema, SSuperTableObj *pTable); +void mgmtDropAllSuperTables(SDbObj *pDropDb); + #ifdef __cplusplus } #endif diff --git a/src/mnode/inc/mgmtSystem.h b/src/mnode/inc/mgmtSystem.h deleted file mode 100644 index 5d71809f36de9ab44797c907c89fad44de0d47f6..0000000000000000000000000000000000000000 --- a/src/mnode/inc/mgmtSystem.h +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_MGMT_SYSTEM_H -#define TDENGINE_MGMT_SYSTEM_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -int32_t mgmtInitSystem(); -int32_t mgmtStartSystem(); -void mgmtCleanUpSystem(); -void mgmtStopSystem(); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/mnode/inc/mgmtTable.h b/src/mnode/inc/mgmtTable.h index 76b956f538c4e2e6e43b496e0cf7f62fa5cbde1c..b145210e9bbf1800ad6e80b46f635407102fb86c 100644 --- a/src/mnode/inc/mgmtTable.h +++ b/src/mnode/inc/mgmtTable.h @@ -30,23 +30,10 @@ void mgmtCleanUpTables(); STableInfo* mgmtGetTable(char *tableId); STableInfo* mgmtGetTableByPos(uint32_t dnodeIp, int32_t vnode, int32_t sid); -int32_t mgmtGetTableMeta(SDbObj *pDb, STableInfo *pTable, STableMeta *pMeta, bool usePublicIp); - -int32_t mgmtRetrieveMetricMeta(void *pConn, char **pStart, SSuperTableMetaMsg *pInfo); -int32_t mgmtCreateTable(SCreateTableMsg *pCreate, int32_t contLen, void *thandle, bool isGetMeta); -int32_t mgmtDropTable(SDbObj *pDb, char *tableId, int32_t ignore); -int32_t mgmtAlterTable(SDbObj *pDb, SAlterTableMsg *pAlter); +int32_t mgmtGetTableMeta(SDbObj *pDb, STableInfo *pTable, STableMetaMsg *pMeta, bool usePublicIp); void mgmtAddTableIntoSuperTable(SSuperTableObj *pStable); void mgmtRemoveTableFromSuperTable(SSuperTableObj *pStable); -void mgmtSetTableDirty(STableInfo *pTable, bool isDirty); - -SMDDropTableMsg *mgmtBuildRemoveTableMsg(STableInfo *pTable); -SDRemoveSuperTableMsg *mgmtBuildRemoveSuperTableMsg(STableInfo *pTable); - -void mgmtProcessGetTableMeta(STableInfo *pTable, void *thandle); -void mgmtProcessCreateTable(SVgObj *pVgroup, SCreateTableMsg *pCreate, int32_t contLen, void *thandle, bool isGetMeta); -void mgmtProcessCreateVgroup(SCreateTableMsg *pCreate, int32_t contLen, void *thandle, bool isGetMeta); #ifdef __cplusplus } diff --git a/src/mnode/inc/mgmtVgroup.h b/src/mnode/inc/mgmtVgroup.h index 975a10dc47ca2303b52ef02c637358370577ea10..b7c68b5f8062240fd1f7c71f1e789d3a31f08630 100644 --- a/src/mnode/inc/mgmtVgroup.h +++ b/src/mnode/inc/mgmtVgroup.h @@ -29,19 +29,18 @@ void mgmtCleanUpVgroups(); SVgObj *mgmtGetVgroup(int32_t vgId); SVgObj *mgmtGetVgroupByVnode(uint32_t dnode, int32_t vnode); -SVgObj *mgmtCreateVgroup(SDbObj *pDb); -int32_t mgmtDropVgroup(SDbObj *pDb, SVgObj *pVgroup); +void mgmtCreateVgroup(SQueuedMsg *pMsg); +void mgmtDropVgroup(SVgObj *pVgroup, void *ahandle); void mgmtUpdateVgroup(SVgObj *pVgroup); +void mgmtUpdateVgroupIp(SDnodeObj *pDnode); void mgmtSetVgroupIdPool(); SVgObj *mgmtGetAvailableVgroup(SDbObj *pDb); void mgmtAddTableIntoVgroup(SVgObj *pVgroup, STableInfo *pTable); void mgmtRemoveTableFromVgroup(SVgObj *pVgroup, STableInfo *pTable); - -SMDCreateVnodeMsg *mgmtBuildCreateVnodeMsg(SVgObj *pVgroup, int32_t vnode); -void mgmtSendCreateVnodeMsg(SVgObj *pVgroup, int32_t vnode, SRpcIpSet *ipSet, void *ahandle); -void mgmtSendCreateVgroupMsg(SVgObj *pVgroup, void *ahandle); +void mgmtSendCreateVnodeMsg(SVgObj *pVgroup, SRpcIpSet *ipSet, void *ahandle); +void mgmtSendDropVnodeMsg(int32_t vgId, SRpcIpSet *ipSet, void *ahandle); SRpcIpSet mgmtGetIpSetFromVgroup(SVgObj *pVgroup); SRpcIpSet mgmtGetIpSetFromIp(uint32_t ip); diff --git a/src/mnode/src/mgmtAcct.c b/src/mnode/src/mgmtAcct.c index 4c25449c22ad0ed48d55726f8792851be120c22f..b1ee72386ef9bc36635c982685e7885bd1ab5f93 100644 --- a/src/mnode/src/mgmtAcct.c +++ b/src/mnode/src/mgmtAcct.c @@ -24,7 +24,7 @@ void (*mgmtCleanUpAcctsFp)() = NULL; SAcctObj *(*mgmtGetAcctFp)(char *acctName) = NULL; int32_t (*mgmtCheckUserLimitFp)(SAcctObj *pAcct) = NULL; int32_t (*mgmtCheckDbLimitFp)(SAcctObj *pAcct) = NULL; -int32_t (*mgmtCheckTimeSeriesLimitFp)(SAcctObj *pAcct, int32_t numOfTimeSeries) = NULL; +int32_t (*mgmtCheckTableLimitFp)(SAcctObj *pAcct, int32_t numOfTimeSeries) = NULL; int32_t mgmtAddDbIntoAcct(SAcctObj *pAcct, SDbObj *pDb) { pthread_mutex_lock(&pAcct->mutex); @@ -137,8 +137,8 @@ int32_t mgmtCheckDbLimit(SAcctObj *pAcct) { } int32_t mgmtCheckTableLimit(SAcctObj *pAcct, int32_t numOfTimeSeries) { - if (mgmtCheckTimeSeriesLimitFp) { - return (*mgmtCheckTimeSeriesLimitFp)(pAcct, numOfTimeSeries); + if (mgmtCheckTableLimitFp) { + return (*mgmtCheckTableLimitFp)(pAcct, numOfTimeSeries); } else { return 0; } diff --git a/src/mnode/src/mgmtBalance.c b/src/mnode/src/mgmtBalance.c index 81f410548e13e606cea3375434e1635c962824e3..cb4857fa2c9bf424c8989fd6aa79f44d88d5dff7 100644 --- a/src/mnode/src/mgmtBalance.c +++ b/src/mnode/src/mgmtBalance.c @@ -20,6 +20,7 @@ int32_t (*mgmtInitBalanceFp)() = NULL; void (*mgmtCleanupBalanceFp)() = NULL; +void (*mgmtStartBalanceTimerFp)(int32_t afterMs) = NULL; int32_t (*mgmtAllocVnodesFp)(SVgObj *pVgroup) = NULL; int32_t mgmtInitBalance() { @@ -36,33 +37,28 @@ void mgmtCleanupBalance() { } } +void mgmtStartBalanceTimer(int32_t afterMs) { + if (mgmtStartBalanceTimerFp) { + (*mgmtStartBalanceTimerFp)(afterMs); + } +} + int32_t mgmtAllocVnodes(SVgObj *pVgroup) { if (mgmtAllocVnodesFp) { - return mgmtAllocVnodesFp(pVgroup); + return (*mgmtAllocVnodesFp)(pVgroup); } - SDnodeObj *pDnode = mgmtGetDnode(0); + SDnodeObj *pDnode = mgmtGetDnode(1); if (pDnode == NULL) return TSDB_CODE_OTHERS; - int32_t selectedVnode = -1; - int32_t lastAllocVode = pDnode->lastAllocVnode; - - for (int32_t i = 0; i < pDnode->numOfVnodes; i++) { - int32_t vnode = (i + lastAllocVode) % pDnode->numOfVnodes; - if (pDnode->vload[vnode].vgId == 0 && pDnode->vload[vnode].status == TSDB_VN_STATUS_OFFLINE) { - selectedVnode = vnode; - break; - } - } - - if (selectedVnode == -1) { - mError("vgroup:%d alloc vnode failed, free vnodes:%d", pVgroup->vgId, pDnode->numOfFreeVnodes); - return -1; + if (pDnode->openVnodes < pDnode->numOfTotalVnodes) { + pVgroup->vnodeGid[0].dnodeId = pDnode->dnodeId; + pVgroup->vnodeGid[0].privateIp = pDnode->privateIp; + pVgroup->vnodeGid[0].publicIp = pDnode->publicIp; + mTrace("dnode:%d, alloc one vnode to vgroup", pDnode->dnodeId); + return TSDB_CODE_SUCCESS; } else { - mTrace("vgroup:%d allocate vnode:%d, last allocated vnode:%d", pVgroup->vgId, selectedVnode, lastAllocVode); - pVgroup->vnodeGid[0].vnode = selectedVnode; - pDnode->lastAllocVnode = selectedVnode + 1; - if (pDnode->lastAllocVnode >= pDnode->numOfVnodes) pDnode->lastAllocVnode = 0; - return 0; + mError("dnode:%d, failed to alloc vnode to vgroup", pDnode->dnodeId); + return TSDB_CODE_NO_ENOUGH_DNODES; } } diff --git a/src/mnode/src/mgmtChildTable.c b/src/mnode/src/mgmtChildTable.c index d27fa8010227238bfc5899274c54178b1978a91c..d5f0e7c85378d2b775a323a3d1a1bad783088a3f 100644 --- a/src/mnode/src/mgmtChildTable.c +++ b/src/mnode/src/mgmtChildTable.c @@ -16,7 +16,6 @@ #define _DEFAULT_SOURCE #include "os.h" #include "taosmsg.h" -#include "tschemautil.h" #include "tscompression.h" #include "tskiplist.h" #include "ttime.h" @@ -34,7 +33,7 @@ #include "mgmtTable.h" #include "mgmtVgroup.h" -void *tsChildTableSdb; +void *tsChildTableSdb; int32_t tsChildTableUpdateSize; void *(*mgmtChildTableActionFp[SDB_MAX_ACTION_TYPES])(void *row, char *str, int32_t size, int32_t *ssize); @@ -51,12 +50,12 @@ static void mgmtDestroyChildTable(SChildTableObj *pTable) { } static void mgmtChildTableActionInit() { - mgmtChildTableActionFp[SDB_TYPE_INSERT] = mgmtChildTableActionInsert; - mgmtChildTableActionFp[SDB_TYPE_DELETE] = mgmtChildTableActionDelete; - mgmtChildTableActionFp[SDB_TYPE_UPDATE] = mgmtChildTableActionUpdate; - mgmtChildTableActionFp[SDB_TYPE_ENCODE] = mgmtChildTableActionEncode; - mgmtChildTableActionFp[SDB_TYPE_DECODE] = mgmtChildTableActionDecode; - mgmtChildTableActionFp[SDB_TYPE_RESET] = mgmtChildTableActionReset; + mgmtChildTableActionFp[SDB_TYPE_INSERT] = mgmtChildTableActionInsert; + mgmtChildTableActionFp[SDB_TYPE_DELETE] = mgmtChildTableActionDelete; + mgmtChildTableActionFp[SDB_TYPE_UPDATE] = mgmtChildTableActionUpdate; + mgmtChildTableActionFp[SDB_TYPE_ENCODE] = mgmtChildTableActionEncode; + mgmtChildTableActionFp[SDB_TYPE_DECODE] = mgmtChildTableActionDecode; + mgmtChildTableActionFp[SDB_TYPE_RESET] = mgmtChildTableActionReset; mgmtChildTableActionFp[SDB_TYPE_DESTROY] = mgmtChildTableActionDestroy; } @@ -77,26 +76,26 @@ void *mgmtChildTableActionInsert(void *row, char *str, int32_t size, int32_t *ss SVgObj *pVgroup = mgmtGetVgroup(pTable->vgId); if (pVgroup == NULL) { - mError("id:%s not in vgroup:%d", pTable->tableId, pTable->vgId); + mError("ctable:%s, not in vgroup:%d", pTable->tableId, pTable->vgId); return NULL; } SDbObj *pDb = mgmtGetDb(pVgroup->dbName); if (pDb == NULL) { - mError("vgroup:%d not in DB:%s", pVgroup->vgId, pVgroup->dbName); + mError("ctable:%s, vgroup:%d not in db:%s", pTable->tableId, pVgroup->vgId, pVgroup->dbName); return NULL; } SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); if (pAcct == NULL) { - mError("account not exists"); + mError("ctable:%s, account:%s not exists", pTable->tableId, pDb->cfg.acct); return NULL; } if (!sdbMaster) { int32_t sid = taosAllocateId(pVgroup->idPool); if (sid != pTable->sid) { - mError("sid:%d is not matched from the master:%d", sid, pTable->sid); + mError("ctable:%s, sid:%d is not matched from the master:%d", pTable->tableId, sid, pTable->sid); return NULL; } } @@ -123,19 +122,18 @@ void *mgmtChildTableActionDelete(void *row, char *str, int32_t size, int32_t *ss SVgObj *pVgroup = mgmtGetVgroup(pTable->vgId); if (pVgroup == NULL) { - mError("id:%s not in vgroup:%d", pTable->tableId, pTable->vgId); return NULL; } SDbObj *pDb = mgmtGetDb(pVgroup->dbName); if (pDb == NULL) { - mError("vgroup:%d not in DB:%s", pVgroup->vgId, pVgroup->dbName); + mError("ctable:%s, vgroup:%d not in DB:%s", pTable->tableId, pVgroup->vgId, pVgroup->dbName); return NULL; } SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); if (pAcct == NULL) { - mError("account not exists"); + mError("ctable:%s, account:%s not exists", pTable->tableId, pDb->cfg.acct); return NULL; } @@ -198,7 +196,7 @@ int32_t mgmtInitChildTables() { tsChildTableUpdateSize = tObj.updateEnd - (int8_t *)&tObj; tsChildTableSdb = sdbOpenTable(tsMaxTables, tsChildTableUpdateSize, - "ctables", SDB_KEYTYPE_STRING, tsMgmtDirectory, mgmtChildTableAction); + "ctables", SDB_KEYTYPE_STRING, tsMnodeDir, mgmtChildTableAction); if (tsChildTableSdb == NULL) { mError("failed to init child table data"); return -1; @@ -272,36 +270,34 @@ void mgmtCleanUpChildTables() { sdbCloseTable(tsChildTableSdb); } -static void *mgmtBuildCreateChildTableMsg(SChildTableObj *pTable, SVgObj *pVgroup, void *pTagData, int32_t tagDataLen) { - int32_t totalCols = pTable->superTable->numOfColumns + pTable->superTable->numOfTags; - int32_t contLen = sizeof(SDMCreateTableMsg) + totalCols * sizeof(SSchema) + tagDataLen; +void *mgmtBuildCreateChildTableMsg(SCMCreateTableMsg *pMsg, SChildTableObj *pTable) { + char *pTagData = pMsg->schema + TSDB_TABLE_ID_LEN + 1; + int32_t tagDataLen = htonl(pMsg->contLen) - sizeof(SCMCreateTableMsg) - TSDB_TABLE_ID_LEN - 1; + int32_t totalCols = pTable->superTable->numOfColumns + pTable->superTable->numOfTags; + int32_t contLen = sizeof(SMDCreateTableMsg) + totalCols * sizeof(SSchema) + tagDataLen; - SDMCreateTableMsg *pCreateTable = rpcMallocCont(contLen); - if (pCreateTable == NULL) { + SMDCreateTableMsg *pCreate = rpcMallocCont(contLen); + if (pCreate == NULL) { + terrno = TSDB_CODE_SERV_OUT_OF_MEMORY; return NULL; } - memcpy(pCreateTable->tableId, pTable->tableId, TSDB_TABLE_ID_LEN); - memcpy(pCreateTable->superTableId, pTable->superTable->tableId, TSDB_TABLE_ID_LEN); - pCreateTable->tableType = pTable->type; - pCreateTable->numOfColumns = htons(pTable->superTable->numOfColumns); - pCreateTable->numOfTags = htons(pTable->superTable->numOfTags); - pCreateTable->sid = htonl(pTable->sid); - pCreateTable->sversion = htonl(pTable->superTable->sversion); - pCreateTable->tagDataLen = htonl(tagDataLen); - pCreateTable->sqlDataLen = 0; - pCreateTable->contLen = htonl(contLen); - pCreateTable->numOfVPeers = htonl(pVgroup->numOfVnodes); - pCreateTable->uid = htobe64(pTable->uid); - pCreateTable->superTableUid = htobe64(pTable->superTable->uid); - pCreateTable->createdTime = htobe64(pTable->createdTime); - - for (int i = 0; i < pVgroup->numOfVnodes; ++i) { - pCreateTable->vpeerDesc[i].ip = htonl(pVgroup->vnodeGid[i].ip); - pCreateTable->vpeerDesc[i].vnode = htonl(pVgroup->vnodeGid[i].vnode); - } - - SSchema *pSchema = (SSchema *) pCreateTable->data; + memcpy(pCreate->tableId, pTable->tableId, TSDB_TABLE_ID_LEN + 1); + memcpy(pCreate->superTableId, pTable->superTable->tableId, TSDB_TABLE_ID_LEN + 1); + pCreate->contLen = htonl(contLen); + pCreate->vgId = htonl(pTable->vgId); + pCreate->tableType = pTable->type; + pCreate->numOfColumns = htons(pTable->superTable->numOfColumns); + pCreate->numOfTags = htons(pTable->superTable->numOfTags); + pCreate->sid = htonl(pTable->sid); + pCreate->sversion = htonl(pTable->superTable->sversion); + pCreate->tagDataLen = htonl(tagDataLen); + pCreate->sqlDataLen = 0; + pCreate->uid = htobe64(pTable->uid); + pCreate->superTableUid = htobe64(pTable->superTable->uid); + pCreate->createdTime = htobe64(pTable->createdTime); + + SSchema *pSchema = (SSchema *) pCreate->data; memcpy(pSchema, pTable->superTable->schema, totalCols * sizeof(SSchema)); for (int32_t col = 0; col < totalCols; ++col) { pSchema->bytes = htons(pSchema->bytes); @@ -309,105 +305,86 @@ static void *mgmtBuildCreateChildTableMsg(SChildTableObj *pTable, SVgObj *pVgrou pSchema++; } - memcpy(pCreateTable + sizeof(SDMCreateTableMsg) + totalCols * sizeof(SSchema), pTagData, tagDataLen); - - return pCreateTable; + memcpy(pCreate->data + totalCols * sizeof(SSchema), pTagData, tagDataLen); + return pCreate; } -int32_t mgmtCreateChildTable(SCreateTableMsg *pCreate, int32_t contLen, SVgObj *pVgroup, int32_t sid, - SDMCreateTableMsg **pDCreateOut, STableInfo **pTableOut) { +void* mgmtCreateChildTable(SCMCreateTableMsg *pCreate, SVgObj *pVgroup, int32_t tid) { int32_t numOfTables = sdbGetNumOfRows(tsChildTableSdb); if (numOfTables >= tsMaxTables) { - mError("table:%s, numOfTables:%d exceed maxTables:%d", pCreate->tableId, numOfTables, tsMaxTables); - return TSDB_CODE_TOO_MANY_TABLES; + mError("ctable:%s, numOfTables:%d exceed maxTables:%d", pCreate->tableId, numOfTables, tsMaxTables); + terrno = TSDB_CODE_TOO_MANY_TABLES; + return NULL; } - char *pTagData = (char *) pCreate->schema; // it is a tag key + char *pTagData = (char *) pCreate->schema; // it is a tag key SSuperTableObj *pSuperTable = mgmtGetSuperTable(pTagData); if (pSuperTable == NULL) { - mError("table:%s, corresponding super table does not exist", pCreate->tableId); - return TSDB_CODE_INVALID_TABLE; + mError("ctable:%s, corresponding super table does not exist", pCreate->tableId); + terrno = TSDB_CODE_INVALID_TABLE; + return NULL; } SChildTableObj *pTable = (SChildTableObj *) calloc(sizeof(SChildTableObj), 1); if (pTable == NULL) { - mError("table:%s, failed to alloc memory", pCreate->tableId); - return TSDB_CODE_SERV_OUT_OF_MEMORY; + mError("ctable:%s, failed to alloc memory", pCreate->tableId); + terrno = TSDB_CODE_SERV_OUT_OF_MEMORY; + return NULL; } + strcpy(pTable->tableId, pCreate->tableId); strcpy(pTable->superTableId, pSuperTable->tableId); - pTable->type = TSDB_TABLE_TYPE_CHILD_TABLE; + pTable->type = TSDB_CHILD_TABLE; pTable->createdTime = taosGetTimestampMs(); - pTable->superTable = pSuperTable; - pTable->vgId = pVgroup->vgId; - pTable->sid = sid; pTable->uid = (((uint64_t) pTable->vgId) << 40) + ((((uint64_t) pTable->sid) & ((1ul << 24) - 1ul)) << 16) + ((uint64_t) sdbGetVersion() & ((1ul << 16) - 1ul)); + pTable->sid = tid; + pTable->vgId = pVgroup->vgId; + pTable->superTable = pSuperTable; if (sdbInsertRow(tsChildTableSdb, pTable, 0) < 0) { - mError("table:%s, update sdb error", pCreate->tableId); - return TSDB_CODE_SDB_ERROR; - } - - pTagData += (TSDB_TABLE_ID_LEN + 1); - int32_t tagDataLen = contLen - sizeof(SCreateTableMsg) - TSDB_TABLE_ID_LEN - 1; - *pDCreateOut = mgmtBuildCreateChildTableMsg(pTable, pVgroup, pTagData, tagDataLen); - if (*pDCreateOut == NULL) { - mError("table:%s, failed to build create table message", pCreate->tableId); - return TSDB_CODE_SERV_OUT_OF_MEMORY; + free(pTable); + mError("ctable:%s, update sdb error", pCreate->tableId); + terrno = TSDB_CODE_SDB_ERROR; + return NULL; } - *pTableOut = (STableInfo *) pTable; - - mTrace("table:%s, create table in vgroup, vgroup:%d sid:%d vnode:%d uid:%" PRIu64 , - pTable->tableId, pVgroup->vgId, sid, pVgroup->vnodeGid[0].vnode, pTable->uid); - - return TSDB_CODE_SUCCESS; + mTrace("ctable:%s, create ctable in vgroup, uid:%" PRIu64 , pTable->tableId, pTable->uid); + return pTable; } -int32_t mgmtDropChildTable(SDbObj *pDb, SChildTableObj *pTable) { +int32_t mgmtDropChildTable(SQueuedMsg *newMsg, SChildTableObj *pTable) { SVgObj *pVgroup = mgmtGetVgroup(pTable->vgId); if (pVgroup == NULL) { - mError("table:%s, failed to drop child table, vgroup not exist", pTable->tableId); + mError("ctable:%s, failed to drop child table, vgroup not exist", pTable->tableId); return TSDB_CODE_OTHERS; } - SMDDropTableMsg *pRemove = rpcMallocCont(sizeof(SMDDropTableMsg)); - if (pRemove == NULL) { - mError("table:%s, failed to drop child table, no enough memory", pTable->tableId); + SMDDropTableMsg *pDrop = rpcMallocCont(sizeof(SMDDropTableMsg)); + if (pDrop == NULL) { + mError("ctable:%s, failed to drop child table, no enough memory", pTable->tableId); return TSDB_CODE_SERV_OUT_OF_MEMORY; } - strcpy(pRemove->tableId, pTable->tableId); - pRemove->sid = htonl(pTable->sid); - pRemove->uid = htobe64(pTable->uid); - - pRemove->numOfVPeers = htonl(pVgroup->numOfVnodes); - for (int i = 0; i < pVgroup->numOfVnodes; ++i) { - pRemove->vpeerDesc[i].ip = htonl(pVgroup->vnodeGid[i].ip); - pRemove->vpeerDesc[i].vnode = htonl(pVgroup->vnodeGid[i].vnode); - } + strcpy(pDrop->tableId, pTable->tableId); + pDrop->vgId = htonl(pTable->vgId); + pDrop->contLen = htonl(sizeof(SMDDropTableMsg)); + pDrop->sid = htonl(pTable->sid); + pDrop->uid = htobe64(pTable->uid); SRpcIpSet ipSet = mgmtGetIpSetFromVgroup(pVgroup); - mTrace("table:%s, send drop table msg", pRemove->tableId); + mTrace("ctable:%s, send drop table msg", pDrop->tableId); SRpcMsg rpcMsg = { - .handle = 0, - .pCont = pRemove, + .handle = newMsg, + .pCont = pDrop, .contLen = sizeof(SMDDropTableMsg), .code = 0, .msgType = TSDB_MSG_TYPE_MD_DROP_TABLE }; - mgmtSendMsgToDnode(&ipSet, &rpcMsg); - - if (sdbDeleteRow(tsChildTableSdb, pTable) < 0) { - mError("table:%s, update ctables sdb error", pTable->tableId); - return TSDB_CODE_SDB_ERROR; - } - if (pVgroup->numOfTables <= 0) { - mgmtDropVgroup(pDb, pVgroup); - } + newMsg->ahandle = pTable; + mgmtSendMsgToDnode(&ipSet, &rpcMsg); return TSDB_CODE_SUCCESS; } @@ -417,6 +394,7 @@ void* mgmtGetChildTable(char *tableId) { } int32_t mgmtModifyChildTableTagValueByName(SChildTableObj *pTable, char *tagName, char *nContent) { +// TODO: send message to dnode // int32_t col = mgmtFindSuperTableTagIndex(pTable->superTable, tagName); // if (col < 0 || col > pTable->superTable->numOfTags) { // return TSDB_CODE_APP_ERROR; @@ -464,17 +442,17 @@ int32_t mgmtModifyChildTableTagValueByName(SChildTableObj *pTable, char *tagName return 0; } -int32_t mgmtGetChildTableMeta(SDbObj *pDb, SChildTableObj *pTable, STableMeta *pMeta, bool usePublicIp) { +int32_t mgmtGetChildTableMeta(SDbObj *pDb, SChildTableObj *pTable, STableMetaMsg *pMeta, bool usePublicIp) { pMeta->uid = htobe64(pTable->uid); pMeta->sid = htonl(pTable->sid); - pMeta->vgid = htonl(pTable->vgId); + pMeta->vgId = htonl(pTable->vgId); pMeta->sversion = htons(pTable->superTable->sversion); pMeta->precision = pDb->cfg.precision; pMeta->numOfTags = pTable->superTable->numOfTags; pMeta->numOfColumns = htons(pTable->superTable->numOfColumns); pMeta->tableType = pTable->type; - pMeta->contLen = sizeof(STableMeta) + mgmtSetSchemaFromSuperTable(pMeta->schema, pTable->superTable); - strcpy(pMeta->tableId, pTable->tableId); + pMeta->contLen = sizeof(STableMetaMsg) + mgmtSetSchemaFromSuperTable(pMeta->schema, pTable->superTable); + strncpy(pMeta->tableId, pTable->tableId, tListLen(pTable->tableId)); SVgObj *pVgroup = mgmtGetVgroup(pTable->vgId); if (pVgroup == NULL) { @@ -483,13 +461,36 @@ int32_t mgmtGetChildTableMeta(SDbObj *pDb, SChildTableObj *pTable, STableMeta *p for (int32_t i = 0; i < TSDB_VNODES_SUPPORT; ++i) { if (usePublicIp) { pMeta->vpeerDesc[i].ip = pVgroup->vnodeGid[i].publicIp; - pMeta->vpeerDesc[i].vnode = htonl(pVgroup->vnodeGid[i].vnode); } else { - pMeta->vpeerDesc[i].ip = pVgroup->vnodeGid[i].ip; - pMeta->vpeerDesc[i].vnode = htonl(pVgroup->vnodeGid[i].vnode); + pMeta->vpeerDesc[i].ip = pVgroup->vnodeGid[i].privateIp; } + pMeta->vpeerDesc[i].vnode = htonl(pVgroup->vnodeGid[i].vnode); } pMeta->numOfVpeers = pVgroup->numOfVnodes; return TSDB_CODE_SUCCESS; } + +void mgmtDropAllChildTables(SDbObj *pDropDb) { + void *pNode = NULL; + void *pLastNode = NULL; + int32_t numOfTables = 0; + int32_t dbNameLen = strlen(pDropDb->name); + SChildTableObj *pTable = NULL; + + while (1) { + pNode = sdbFetchRow(tsChildTableSdb, pNode, (void **)&pTable); + if (pTable == NULL) { + break; + } + + if (strncmp(pDropDb->name, pTable->tableId, dbNameLen) == 0) { + sdbDeleteRow(tsChildTableSdb, pTable); + pNode = pLastNode; + numOfTables ++; + continue; + } + } + + mTrace("db:%s, all child tables:%d is dropped", pDropDb->name, numOfTables); +} \ No newline at end of file diff --git a/src/mnode/src/mgmtDClient.c b/src/mnode/src/mgmtDClient.c index 4670663222dde7b01521976e6ba7912f2247c701..da11ad20811734d158aeaa12733ddae178402273 100644 --- a/src/mnode/src/mgmtDClient.c +++ b/src/mnode/src/mgmtDClient.c @@ -78,149 +78,8 @@ static void mgmtProcessRspFromDnode(SRpcMsg *rpcMsg) { if (mgmtProcessDnodeRspFp[rpcMsg->msgType]) { (*mgmtProcessDnodeRspFp[rpcMsg->msgType])(rpcMsg); } else { - dError("%s is not processed", taosMsg[rpcMsg->msgType]); + mError("%s is not processed in dclient", taosMsg[rpcMsg->msgType]); } rpcFreeCont(rpcMsg->pCont); } - - -//static void mgmtProcessCreateTableRsp(SRpcMsg *rpcMsg) { -// mTrace("create table rsp received, handle:%p code:%d", rpcMsg->handle, rpcMsg->code); -// if (rpcMsg->handle == NULL) return; -// -// SProcessInfo *info = rpcMsg->handle; -// assert(info->type == TSDB_PROCESS_CREATE_TABLE || info->type == TSDB_PROCESS_CREATE_TABLE_GET_META); -// -// STableInfo *pTable = info->ahandle; -// if (rpcMsg->code != TSDB_CODE_SUCCESS) { -// mError("table:%s, failed to create in dnode, code:%d, set it dirty", pTable->tableId, rpcMsg->code); -// mgmtSetTableDirty(pTable, true); -// } else { -// mTrace("table:%s, created in dnode", pTable->tableId); -// mgmtSetTableDirty(pTable, false); -// } -// -// if (rpcMsg->code != TSDB_CODE_SUCCESS) { -// SRpcMsg rpcRsp = {.handle = info->thandle, .pCont = NULL, .contLen = 0, .code = rpcMsg->code, .msgType = 0}; -// rpcSendResponse(&rpcMsg); -// } else { -// if (info->type == TSDB_PROCESS_CREATE_TABLE_GET_META) { -// mTrace("table:%s, start to process get meta", pTable->tableId); -// mgmtProcessGetTableMeta(pTable, rpcMsg->handle); -// } else { -// SRpcMsg rpcRsp = {.handle = info->thandle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; -// rpcSendResponse(&rpcMsg); -// } -// } -// -// free(info); -//} -// -//static void mgmtProcessDropTableRsp(SRpcMsg *rpcMsg) { -// mTrace("drop table rsp received, handle:%p code:%d", rpcMsg->handle, rpcMsg->code); -//} -// -//static void mgmtProcessAlterTableRsp(SRpcMsg *rpcMsg) { -// mTrace("alter table rsp received, handle:%p code:%d", rpcMsg->handle, rpcMsg->code); -//} -// -//static void mgmtProcessCreateVnodeRsp(SRpcMsg *rpcMsg) { -// mTrace("create vnode rsp received, handle:%p code:%d", rpcMsg->handle, rpcMsg->code); -// if (rpcMsg->handle == NULL) return; -// -// SProcessInfo *info = rpcMsg->handle; -// assert(info->type == TSDB_PROCESS_CREATE_VGROUP || info->type == TSDB_PROCESS_CREATE_VGROUP_GET_META); -// -// info->received++; -// SVgObj *pVgroup = info->ahandle; -// -// bool isGetMeta = false; -// if (info->type == TSDB_PROCESS_CREATE_VGROUP_GET_META) { -// isGetMeta = true; -// } -// -// mTrace("vgroup:%d, received:%d numOfVnodes:%d", pVgroup->vgId, info->received, pVgroup->numOfVnodes); -// if (info->received == pVgroup->numOfVnodes) { -// mgmtProcessCreateTable(pVgroup, info->cont, info->contLen, info->thandle, isGetMeta); -// free(info); -// } -//} -// -//static void mgmtProcessDropVnodeRsp(SRpcMsg *rpcMsg) { -// mTrace("drop vnode rsp received, handle:%p code:%d", rpcMsg->handle, rpcMsg->code); -//} -// -//static void mgmtProcessAlterVnodeRsp(SRpcMsg *rpcMsg) { -// mTrace("alter vnode rsp received, handle:%p code:%d", rpcMsg->handle, rpcMsg->code); -//} -// -//static void mgmtProcessDropStableRsp(SRpcMsg *rpcMsg) { -// mTrace("drop stable rsp received, handle:%p code:%d", rpcMsg->handle, rpcMsg->code); -//} -// -//static void mgmtProcessAlterStreamRsp(SRpcMsg *rpcMsg) { -// mTrace("alter stream rsp received, handle:%p code:%d", rpcMsg->handle, rpcMsg->code); -//} -// -//static void mgmtProcessConfigDnodeRsp(SRpcMsg *rpcMsg) { -// mTrace("config dnode rsp received, handle:%p code:%d", rpcMsg->handle, rpcMsg->code); -//} -// - - -// -//void mgmtSendAlterStreamMsg(STableInfo *pTable, SRpcIpSet *ipSet, void *ahandle) { -// mTrace("table:%s, send alter stream msg, ahandle:%p", pTable->tableId, pTable->sid, ahandle); -//} -// -//void mgmtSendDropVnodeMsg(int32_t vgId, int32_t vnode, SRpcIpSet *ipSet, void *ahandle) { -// mTrace("vnode:%d send free vnode msg, ahandle:%p", vnode, ahandle); -// SMDDropVnodeMsg *pDrop = rpcMallocCont(sizeof(SMDDropVnodeMsg)); -// SRpcMsg rpcMsg = { -// .handle = ahandle, -// .pCont = pDrop, -// .contLen = pDrop ? sizeof(SMDDropVnodeMsg) : 0, -// .code = 0, -// .msgType = TSDB_MSG_TYPE_MD_DROP_VNODE -// }; -// rpcSendRequest(tsMgmtDClientRpc, ipSet, &rpcMsg); -//} -// - -//// -////int32_t mgmtCfgDynamicOptions(SDnodeObj *pDnode, char *msg) { -//// char *option, *value; -//// int32_t olen, valen; -//// -//// paGetToken(msg, &option, &olen); -//// if (strncasecmp(option, "unremove", 8) == 0) { -//// mgmtSetDnodeUnRemove(pDnode); -//// return TSDB_CODE_SUCCESS; -//// } else if (strncasecmp(option, "score", 5) == 0) { -//// paGetToken(option + olen + 1, &value, &valen); -//// if (valen > 0) { -//// int32_t score = atoi(value); -//// mTrace("dnode:%s, custom score set from:%d to:%d", taosIpStr(pDnode->privateIp), pDnode->customScore, score); -//// pDnode->customScore = score; -//// mgmtUpdateDnode(pDnode); -//// //mgmtStartBalanceTimer(15); -//// } -//// return TSDB_CODE_INVALID_SQL; -//// } else if (strncasecmp(option, "bandwidth", 9) == 0) { -//// paGetToken(msg, &value, &valen); -//// if (valen > 0) { -//// int32_t bandwidthMb = atoi(value); -//// if (bandwidthMb >= 0 && bandwidthMb < 10000000) { -//// mTrace("dnode:%s, bandwidth(Mb) set from:%d to:%d", taosIpStr(pDnode->privateIp), pDnode->bandwidthMb, bandwidthMb); -//// pDnode->bandwidthMb = bandwidthMb; -//// mgmtUpdateDnode(pDnode); -//// return TSDB_CODE_SUCCESS; -//// } -//// } -//// return TSDB_CODE_INVALID_SQL; -//// } -//// -//// return -1; -////} -//// diff --git a/src/mnode/src/mgmtDServer.c b/src/mnode/src/mgmtDServer.c index 27cbf230d55e5b5c7d8f4bdb0795a63e84f08278..177a45764ccd773a3ceb549fd0dccb624c4257c3 100644 --- a/src/mnode/src/mgmtDServer.c +++ b/src/mnode/src/mgmtDServer.c @@ -41,7 +41,7 @@ static void *tsMgmtDServerRpc; int32_t mgmtInitDServer() { SRpcInit rpcInit = {0}; rpcInit.localIp = tsAnyIp ? "0.0.0.0" : tsPrivateIp;; - rpcInit.localPort = tsMgmtDnodePort; + rpcInit.localPort = tsMnodeDnodePort; rpcInit.label = "MND-DS"; rpcInit.numOfThreads = 1; rpcInit.cfp = mgmtProcessMsgFromDnode; @@ -76,7 +76,7 @@ static void mgmtProcessMsgFromDnode(SRpcMsg *rpcMsg) { if (mgmtProcessDnodeMsgFp[rpcMsg->msgType]) { (*mgmtProcessDnodeMsgFp[rpcMsg->msgType])(rpcMsg); } else { - mError("%s is not processed", taosMsg[rpcMsg->msgType]); + mError("%s is not processed in dserver", taosMsg[rpcMsg->msgType]); } rpcFreeCont(rpcMsg->pCont); @@ -85,292 +85,3 @@ static void mgmtProcessMsgFromDnode(SRpcMsg *rpcMsg) { static int mgmtDServerRetrieveAuth(char *user, char *spi, char *encrypt, char *secret, char *ckey) { return TSDB_CODE_SUCCESS; } - -// -// -//static void mgmtProcessTableCfgMsg(int8_t msgType, int8_t *pCont, int32_t contLen, void *thandle) { -// STableCfgMsg *pCfg = (STableCfgMsg *) pCont; -// pCfg->dnode = htonl(pCfg->dnode); -// pCfg->vnode = htonl(pCfg->vnode); -// pCfg->sid = htonl(pCfg->sid); -// mTrace("dnode:%s, vnode:%d, sid:%d, receive table config msg", taosIpStr(pCfg->dnode), pCfg->vnode, pCfg->sid); -// -// if (!sdbMaster) { -// mError("dnode:%s, vnode:%d, sid:%d, not master, redirect it", taosIpStr(pCfg->dnode), pCfg->vnode, pCfg->sid); -// mgmtSendRspToDnode(thandle, msgType + 1, TSDB_CODE_REDIRECT, NULL, 0); -// return; -// } -// -// STableInfo *pTable = mgmtGetTableByPos(pCfg->dnode, pCfg->vnode, pCfg->sid); -// if (pTable == NULL) { -// mError("dnode:%s, vnode:%d, sid:%d, table not found", taosIpStr(pCfg->dnode), pCfg->vnode, pCfg->sid); -// mgmtSendRspToDnode(thandle, msgType + 1, TSDB_CODE_INVALID_TABLE, NULL, 0); -// return; -// } -// -// mgmtSendRspToDnode(thandle, msgType + 1, TSDB_CODE_SUCCESS, NULL, 0); -// -// //TODO -// SRpcIpSet ipSet = mgmtGetIpSetFromIp(pCfg->dnode); -// mgmtSendCreateTableMsg(NULL, &ipSet, NULL); -//} -// -//static void mgmtProcessVnodeCfgMsg(int8_t msgType, int8_t *pCont, int32_t contLen, void *pConn) { -// if (!sdbMaster) { -// mgmtSendRspToDnode(pConn, msgType + 1, TSDB_CODE_REDIRECT, NULL, 0); -// return; -// } -// -// SVpeerCfgMsg *pCfg = (SVpeerCfgMsg *) pCont; -// pCfg->dnode = htonl(pCfg->dnode); -// pCfg->vnode = htonl(pCfg->vnode); -// -// SVgObj *pVgroup = mgmtGetVgroupByVnode(pCfg->dnode, pCfg->vnode); -// if (pVgroup == NULL) { -// mTrace("dnode:%s, vnode:%d, no vgroup info", taosIpStr(pCfg->dnode), pCfg->vnode); -// mgmtSendRspToDnode(pConn, msgType + 1, TSDB_CODE_NOT_ACTIVE_VNODE, NULL, 0); -// return; -// } -// -// mgmtSendRspToDnode(pConn, msgType + 1, TSDB_CODE_SUCCESS, NULL, 0); -// -// SRpcIpSet ipSet = mgmtGetIpSetFromIp(pCfg->dnode); -// mgmtSendCreateVnodeMsg(pVgroup, pCfg->vnode, &ipSet, NULL); -//} -// -//static void mgmtProcessCreateTableRsp(int8_t msgType, int8_t *pCont, int32_t contLen, void *thandle, int32_t code) { -// mTrace("create table rsp received, thandle:%p code:%d", thandle, code); -// if (thandle == NULL) return; -// -// SProcessInfo *info = thandle; -// assert(info->type == TSDB_PROCESS_CREATE_TABLE || info->type == TSDB_PROCESS_CREATE_TABLE_GET_META); -// STableInfo *pTable = info->ahandle; -// -// if (code != TSDB_CODE_SUCCESS) { -// mError("table:%s, failed to create in dnode, code:%d, set it dirty", pTable->tableId); -// mgmtSetTableDirty(pTable, true); -// } else { -// mTrace("table:%s, created in dnode", pTable->tableId); -// mgmtSetTableDirty(pTable, false); -// } -// -// if (code != TSDB_CODE_SUCCESS) { -// SRpcMsg rpcMsg = {0}; -// rpcMsg.code = code; -// rpcMsg.handle = info->thandle; -// rpcSendResponse(&rpcMsg); -// } else { -// if (info->type == TSDB_PROCESS_CREATE_TABLE_GET_META) { -// mTrace("table:%s, start to process get meta", pTable->tableId); -// mgmtProcessGetTableMeta(pTable, thandle); -// } else { -// SRpcMsg rpcMsg = {0}; -// rpcMsg.code = code; -// rpcMsg.handle = info->thandle; -// rpcSendResponse(&rpcMsg); -// } -// } -// -// free(info); -//} -// - -//static void mgmtProcessRemoveTableRsp(int8_t msgType, int8_t *pCont, int32_t contLen, void *thandle, int32_t code) { -// mTrace("remove table rsp received, thandle:%p code:%d", thandle, code); -//} -// - -// -//static void mgmtProcessDropVnodeRsp(int8_t msgType, int8_t *pCont, int32_t contLen, void *thandle, int32_t code) { -// mTrace("free vnode rsp received, thandle:%p code:%d", thandle, code); -//} -// -//static void mgmtProcessDropStableRsp(int8_t msgType, int8_t *pCont, int32_t contLen, void *thandle, int32_t code) { -// mTrace("drop stable rsp received, thandle:%p code:%d", thandle, code); -//} -// -//static void mgmtProcessCreateVnodeRsp(int8_t msgType, int8_t *pCont, int32_t contLen, void *thandle, int32_t code) { -// mTrace("create vnode rsp received, thandle:%p code:%d", thandle, code); -// if (thandle == NULL) return; -// -// SProcessInfo *info = thandle; -// assert(info->type == TSDB_PROCESS_CREATE_VGROUP || info->type == TSDB_PROCESS_CREATE_VGROUP_GET_META); -// info->received++; -// SVgObj *pVgroup = info->ahandle; -// -// bool isGetMeta = false; -// if (info->type == TSDB_PROCESS_CREATE_VGROUP_GET_META) { -// isGetMeta = true; -// } -// -// mTrace("vgroup:%d, received:%d numOfVnodes:%d", pVgroup->vgId, info->received, pVgroup->numOfVnodes); -// if (info->received == pVgroup->numOfVnodes) { -// mgmtProcessCreateTable(pVgroup, info->cont, info->contLen, info->thandle, isGetMeta); -// free(info); -// } -//} -// -//void mgmtSendCreateVgroupMsg(SVgObj *pVgroup, void *ahandle) { -// mTrace("vgroup:%d, send create all vnodes msg, ahandle:%p", pVgroup->vgId, ahandle); -// for (int i = 0; i < pVgroup->numOfVnodes; ++i) { -// SRpcIpSet ipSet = mgmtGetIpSetFromIp(pVgroup->vnodeGid[i].ip); -// mgmtSendCreateVnodeMsg(pVgroup, pVgroup->vnodeGid[i].vnode, &ipSet, ahandle); -// } -//} -// -//void mgmtSendCreateVnodeMsg(SVgObj *pVgroup, int32_t vnode, SRpcIpSet *ipSet, void *ahandle) { -// mTrace("vgroup:%d, send create vnode:%d msg, ahandle:%p", pVgroup->vgId, vnode, ahandle); -// SMDCreateVnodeMsg *pVpeer = mgmtBuildCreateVnodeMsg(pVgroup, vnode); -// if (pVpeer != NULL) { -// mgmtSendMsgToDnode(ipSet, TSDB_MSG_TYPE_MD_CREATE_VNODE, pVpeer, sizeof(SMDCreateVnodeMsg), ahandle); -// } -//} -// -//void mgmtProcessMsgFromDnode(char msgType, void *pCont, int32_t contLen, void *pConn, int32_t code) { -// if (msgType < 0 || msgType >= TSDB_MSG_TYPE_MAX) { -// mError("invalid msg type:%d", msgType); -// return; -// } -// -// mTrace("msg:%d:%s is received from dnode, pConn:%p", msgType, taosMsg[(int8_t)msgType], pConn); -// -// if (msgType == TSDB_MSG_TYPE_TABLE_CFG) { -// mgmtProcessTableCfgMsg(msgType, pCont, contLen, pConn); -// } else if (msgType == TSDB_MSG_TYPE_DM_CONFIG_VNODE) { -// mgmtProcessVnodeCfgMsg(msgType, pCont, contLen, pConn); -// } else if (msgType == TSDB_MSG_TYPE_MD_CREATE_TABLE_RSP) { -// mgmtProcessCreateTableRsp(msgType, pCont, contLen, pConn, code); -// } else if (msgType == TSDB_MSG_TYPE_MD_DROP_TABLE_RSP) { -// mgmtProcessRemoveTableRsp(msgType, pCont, contLen, pConn, code); -// } else if (msgType == TSDB_MSG_TYPE_MD_CREATE_VNODE_RSP) { -// mgmtProcessCreateVnodeRsp(msgType, pCont, contLen, pConn, code); -// } else if (msgType == TSDB_MSG_TYPE_MD_DROP_VNODE_RSP) { -// mgmtProcessDropVnodeRsp(msgType, pCont, contLen, pConn, code); -// } else if (msgType == TSDB_MSG_TYPE_MD_DROP_STABLE) { -// mgmtProcessDropStableRsp(msgType, pCont, contLen, pConn, code); -// } else if (msgType == TSDB_MSG_TYPE_MD_CONFIG_DNODE_RSP) { -// } else if (msgType == TSDB_MSG_TYPE_ALTER_STREAM_RSP) { -// } else if (msgType == TSDB_MSG_TYPE_STATUS) { -// mgmtProcessDnodeStatus(msgType, pCont, contLen, pConn, code); -// } else { -// mError("%s from dnode is not processed", taosMsg[(int8_t)msgType]); -// } -// -// //rpcFreeCont(pCont); -//} -// -//void mgmtSendAlterStreamMsg(STableInfo *pTable, SRpcIpSet *ipSet, void *ahandle) { -// mTrace("table:%s, sid:%d send alter stream msg, ahandle:%p", pTable->tableId, pTable->sid, ahandle); -//} -// -//void mgmtSendDropVnodeMsg(int32_t vnode, SRpcIpSet *ipSet, void *ahandle) { -// mTrace("vnode:%d send free vnode msg, ahandle:%p", vnode, ahandle); -// -// SMDDropVnodeMsg *pFreeVnode = rpcMallocCont(sizeof(SMDDropVnodeMsg)); -// if (pFreeVnode != NULL) { -// pFreeVnode->vnode = htonl(vnode); -// mgmtSendMsgToDnode(ipSet, TSDB_MSG_TYPE_MD_DROP_VNODE, pFreeVnode, sizeof(SMDDropVnodeMsg), ahandle); -// } -//} -// - -//int32_t mgmtCfgDynamicOptions(SDnodeObj *pDnode, char *msg) { -// char *option, *value; -// int32_t olen, valen; -// -// paGetToken(msg, &option, &olen); -// if (strncasecmp(option, "unremove", 8) == 0) { -// mgmtSetDnodeUnRemove(pDnode); -// return TSDB_CODE_SUCCESS; -// } else if (strncasecmp(option, "score", 5) == 0) { -// paGetToken(option + olen + 1, &value, &valen); -// if (valen > 0) { -// int32_t score = atoi(value); -// mTrace("dnode:%s, custom score set from:%d to:%d", taosIpStr(pDnode->privateIp), pDnode->customScore, score); -// pDnode->customScore = score; -// mgmtUpdateDnode(pDnode); -// //mgmtStartBalanceTimer(15); -// } -// return TSDB_CODE_INVALID_SQL; -// } else if (strncasecmp(option, "bandwidth", 9) == 0) { -// paGetToken(msg, &value, &valen); -// if (valen > 0) { -// int32_t bandwidthMb = atoi(value); -// if (bandwidthMb >= 0 && bandwidthMb < 10000000) { -// mTrace("dnode:%s, bandwidth(Mb) set from:%d to:%d", taosIpStr(pDnode->privateIp), pDnode->bandwidthMb, bandwidthMb); -// pDnode->bandwidthMb = bandwidthMb; -// mgmtUpdateDnode(pDnode); -// return TSDB_CODE_SUCCESS; -// } -// } -// return TSDB_CODE_INVALID_SQL; -// } -// -// return -1; -//} -// -// -//void mgmtCleanUpDnodeInt() { -// if (mgmtCleanUpDnodeIntFp) { -// mgmtCleanUpDnodeIntFp(); -// } -//} -// -//void mgmtProcessDnodeStatus(int8_t msgType, void *pCont, int32_t contLen, void *pConn, int32_t code) { -// SStatusMsg *pStatus = (SStatusMsg *)pCont; -// -// SDnodeObj *pObj = mgmtGetDnode(htonl(pStatus->privateIp)); -// if (pObj == NULL) { -// mError("dnode:%s not exist", taosIpStr(pObj->privateIp)); -// mgmtSendRspToDnode(pConn, msgType + 1, TSDB_CODE_DNODE_NOT_EXIST, NULL, 0); -// return; -// } -// -// pObj->lastReboot = htonl(pStatus->lastReboot); -// pObj->numOfTotalVnodes = htons(pStatus->numOfTotalVnodes); -// pObj->openVnodes = htons(pStatus->openVnodes); -// pObj->numOfCores = htons(pStatus->numOfCores); -// pObj->diskAvailable = pStatus->diskAvailable; -// pObj->alternativeRole = pStatus->alternativeRole; -//// -//// if (mgmtProcessDnodeStatusFp) { -//// mgmtProcessDnodeStatusFp(pStatus, pObj, pConn); -//// return; -//// } -// -// pObj->status = TSDB_DN_STATUS_READY; -// -//// // wait vnode dropped -//// for (int32_t vnode = 0; vnode < pObj->numOfVnodes; ++vnode) { -//// SVnodeLoad *pVload = &(pObj->vload[vnode]); -//// if (pVload->dropStatus == TSDB_VN_DROP_STATUS_DROPPING) { -//// bool existInDnode = false; -//// for (int32_t j = 0; j < pObj->openVnodes; ++j) { -//// if (htonl(pStatus->load[j].vnode) == vnode) { -//// existInDnode = true; -//// break; -//// } -//// } -//// -//// if (!existInDnode) { -//// pVload->dropStatus = TSDB_VN_DROP_STATUS_READY; -//// pVload->status = TSDB_VN_STATUS_OFFLINE; -//// mgmtUpdateDnode(pObj); -//// mPrint("dnode:%s, vid:%d, drop finished", taosIpStr(pObj->privateIp), vnode); -//// taosTmrStart(mgmtMonitorDbDrop, 10000, NULL, tsMgmtTmr); -//// } -//// } else if (pVload->vgId == 0) { -//// /* -//// * In some cases, vnode information may be reported abnormally, recover it -//// */ -//// if (pVload->dropStatus != TSDB_VN_DROP_STATUS_READY || pVload->status != TSDB_VN_STATUS_OFFLINE) { -//// mPrint("dnode:%s, vid:%d, vgroup:%d status:%s dropStatus:%s, set it to avail status", -//// taosIpStr(pObj->privateIp), vnode, pVload->vgId, taosGetVnodeStatusStr(pVload->status), -//// taosGetVnodeDropStatusStr(pVload->dropStatus)); -//// pVload->dropStatus = TSDB_VN_DROP_STATUS_READY; -//// pVload->status = TSDB_VN_STATUS_OFFLINE; -//// mgmtUpdateDnode(pObj); -//// } -//// } -//// } -//} diff --git a/src/mnode/src/mgmtDb.c b/src/mnode/src/mgmtDb.c index 750aa9a1c11b9626a41f8ae7213c4852a0644f45..88934599dc237b6451d8631b36db5583c37e819e 100644 --- a/src/mnode/src/mgmtDb.c +++ b/src/mnode/src/mgmtDb.c @@ -15,35 +15,39 @@ #define _DEFAULT_SOURCE #include "os.h" -#include "taoserror.h" -#include "tschemautil.h" -#include "tstatus.h" -#include "tutil.h" -#include "mnode.h" + +#include "mgmtDb.h" #include "mgmtAcct.h" #include "mgmtBalance.h" -#include "mgmtDb.h" +#include "mgmtChildTable.h" #include "mgmtDnode.h" -#include "mgmtMnode.h" #include "mgmtGrant.h" +#include "mgmtMnode.h" +#include "mgmtNormalTable.h" #include "mgmtShell.h" +#include "mgmtSuperTable.h" #include "mgmtTable.h" #include "mgmtUser.h" #include "mgmtVgroup.h" +#include "mnode.h" -static void *tsDbSdb = NULL; +#include "taoserror.h" +#include "tstatus.h" +#include "tutil.h" +#include "name.h" + +static void *tsDbSdb = NULL; static int32_t tsDbUpdateSize; -static int32_t mgmtUpdateDb(SDbObj *pDb); -static int32_t mgmtCreateDb(SAcctObj *pAcct, SCreateDbMsg *pCreate); -static int32_t mgmtDropDbByName(SAcctObj *pAcct, char *name, short ignoreNotExists); -static int32_t mgmtDropDb(SDbObj *pDb); +static int32_t mgmtCreateDb(SAcctObj *pAcct, SCMCreateDbMsg *pCreate); +static void mgmtDropDb(void *handle, void *tmrId); +static void mgmtSetDbDirty(SDbObj *pDb); -static int32_t mgmtGetDbMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn); +static int32_t mgmtGetDbMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); static int32_t mgmtRetrieveDbs(SShowObj *pShow, char *data, int32_t rows, void *pConn); -static void mgmtProcessCreateDbMsg(SRpcMsg *rpcMsg); -static void mgmtProcessAlterDbMsg(SRpcMsg *rpcMsg); -static void mgmtProcessDropDbMsg(SRpcMsg *rpcMsg); +static void mgmtProcessCreateDbMsg(SQueuedMsg *pMsg); +static void mgmtProcessAlterDbMsg(SQueuedMsg *pMsg); +static void mgmtProcessDropDbMsg(SQueuedMsg *pMsg); static void *(*mgmtDbActionFp[SDB_MAX_ACTION_TYPES])(void *row, char *str, int32_t size, int32_t *ssize); static void *mgmtDbActionInsert(void *row, char *str, int32_t size, int32_t *ssize); @@ -60,7 +64,7 @@ static void mgmtDbActionInit() { mgmtDbActionFp[SDB_TYPE_UPDATE] = mgmtDbActionUpdate; mgmtDbActionFp[SDB_TYPE_ENCODE] = mgmtDbActionEncode; mgmtDbActionFp[SDB_TYPE_DECODE] = mgmtDbActionDecode; - mgmtDbActionFp[SDB_TYPE_RESET] = mgmtDbActionReset; + mgmtDbActionFp[SDB_TYPE_RESET] = mgmtDbActionReset; mgmtDbActionFp[SDB_TYPE_DESTROY] = mgmtDbActionDestroy; } @@ -81,7 +85,7 @@ int32_t mgmtInitDbs() { SDbObj tObj; tsDbUpdateSize = tObj.updateEnd - (char *)&tObj; - tsDbSdb = sdbOpenTable(tsMaxDbs, tsDbUpdateSize, "db", SDB_KEYTYPE_STRING, tsMgmtDirectory, mgmtDbAction); + tsDbSdb = sdbOpenTable(tsMaxDbs, tsDbUpdateSize, "dbs", SDB_KEYTYPE_STRING, tsMnodeDir, mgmtDbAction); if (tsDbSdb == NULL) { mError("failed to init db data"); return -1; @@ -98,8 +102,6 @@ int32_t mgmtInitDbs() { pDb->numOfTables = 0; pDb->numOfVgroups = 0; pDb->numOfSuperTables = 0; - pDb->vgStatus = TSDB_VG_STATUS_READY; - pDb->vgTimer = NULL; pAcct = mgmtGetAcct(pDb->cfg.acct); if (pAcct != NULL) mgmtAddDbIntoAcct(pAcct, pDb); @@ -108,9 +110,9 @@ int32_t mgmtInitDbs() { } } - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CREATE_DB, mgmtProcessCreateDbMsg); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_ALTER_DB, mgmtProcessAlterDbMsg); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_DROP_DB, mgmtProcessDropDbMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_CREATE_DB, mgmtProcessCreateDbMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_ALTER_DB, mgmtProcessAlterDbMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_DROP_DB, mgmtProcessDropDbMsg); mgmtAddShellShowMetaHandle(TSDB_MGMT_TABLE_DB, mgmtGetDbMeta); mgmtAddShellShowRetrieveHandle(TSDB_MGMT_TABLE_DB, mgmtRetrieveDbs); @@ -133,7 +135,7 @@ SDbObj *mgmtGetDbByTableId(char *tableId) { return (SDbObj *)sdbGetRow(tsDbSdb, db); } -static int32_t mgmtCheckDBParams(SCreateDbMsg *pCreate) { +static int32_t mgmtCheckDBParams(SCMCreateDbMsg *pCreate) { if (pCreate->commitLog < 0 || pCreate->commitLog > 1) { mError("invalid db option commitLog: %d, only 0 or 1 allowed", pCreate->commitLog); return TSDB_CODE_INVALID_OPTION; @@ -206,7 +208,7 @@ static int32_t mgmtCheckDBParams(SCreateDbMsg *pCreate) { return TSDB_CODE_SUCCESS; } -static int32_t mgmtCheckDbParams(SCreateDbMsg *pCreate) { +static int32_t mgmtCheckDbParams(SCMCreateDbMsg *pCreate) { // assign default parameters if (pCreate->maxSessions < 0) pCreate->maxSessions = tsSessionsPerVnode; // if (pCreate->cacheBlockSize < 0) pCreate->cacheBlockSize = tsCacheBlockSize; // @@ -251,7 +253,7 @@ static int32_t mgmtCheckDbParams(SCreateDbMsg *pCreate) { return TSDB_CODE_SUCCESS; } -static int32_t mgmtCreateDb(SAcctObj *pAcct, SCreateDbMsg *pCreate) { +static int32_t mgmtCreateDb(SAcctObj *pAcct, SCMCreateDbMsg *pCreate) { int32_t numOfDbs = sdbGetNumOfRows(tsDbSdb); if (numOfDbs >= tsMaxDbs) { mWarn("numOfDbs:%d, exceed tsMaxDbs:%d", numOfDbs, tsMaxDbs); @@ -293,133 +295,6 @@ static int32_t mgmtCreateDb(SAcctObj *pAcct, SCreateDbMsg *pCreate) { return code; } -static int32_t mgmtUpdateDb(SDbObj *pDb) { - return sdbUpdateRow(tsDbSdb, pDb, tsDbUpdateSize, 1); -} - -static int32_t mgmtSetDbDropping(SDbObj *pDb) { - if (pDb->dropStatus == TSDB_DB_STATUS_DROP_FROM_SDB) return 0; - - SVgObj *pVgroup = pDb->pHead; - while (pVgroup != NULL) { - for (int32_t i = 0; i < pVgroup->numOfVnodes; i++) { - SVnodeGid *pVnodeGid = pVgroup->vnodeGid + i; - SDnodeObj *pDnode = mgmtGetDnode(pVnodeGid->ip); - if (pDnode == NULL) continue; - - SVnodeLoad *pVload = &pDnode->vload[pVnodeGid->vnode]; - if (pVload->dropStatus != TSDB_VN_DROP_STATUS_DROPPING) { - pVload->dropStatus = TSDB_VN_DROP_STATUS_DROPPING; - - mPrint("dnode:%s vnode:%d db:%s set to dropping status", taosIpStr(pDnode->privateIp), pVnodeGid->vnode, pDb->name); - if (mgmtUpdateDnode(pDnode) < 0) { - mError("db:%s drop failed, dnode sdb update error", pDb->name); - return TSDB_CODE_SDB_ERROR; - } - } - } - - //void mgmtSendDropVgroupMsg(SVgObj *pVgroup, void *ahandle) { - // mTrace("vgroup:%d send free vgroup msg, ahandle:%p", pVgroup->vgId, ahandle); - // - // for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) { - // SRpcIpSet ipSet = mgmtGetIpSetFromIp(pVgroup->vnodeGid[i].ip); - // mgmtSendDropVnodeMsg(pVgroup->vnodeGid[i].vnode, &ipSet, ahandle); - // } - //} - // -// mgmtSendDropVgroupMsg(pVgroup); - pVgroup = pVgroup->next; - } - - if (pDb->dropStatus == TSDB_DB_STATUS_DROPPING) return 0; - - pDb->dropStatus = TSDB_DB_STATUS_DROPPING; - if (mgmtUpdateDb(pDb) < 0) { - mError("db:%s drop failed, db sdb update error", pDb->name); - return TSDB_CODE_SDB_ERROR; - } - - mPrint("db:%s set to dropping status", pDb->name); - return 0; -} - -static bool mgmtCheckDropDbFinished(SDbObj *pDb) { - SVgObj *pVgroup = pDb->pHead; - while (pVgroup) { - for (int32_t i = 0; i < pVgroup->numOfVnodes; i++) { - SVnodeGid *pVnodeGid = pVgroup->vnodeGid + i; - SDnodeObj *pDnode = mgmtGetDnode(pVnodeGid->ip); - - if (pDnode == NULL) continue; - if (pDnode->status == TSDB_DN_STATUS_OFFLINE) continue; - - SVnodeLoad *pVload = &pDnode->vload[pVnodeGid->vnode]; - if (pVload->dropStatus == TSDB_VN_DROP_STATUS_DROPPING) { - mTrace("dnode:%s, vnode:%d db:%s wait dropping", taosIpStr(pDnode->privateIp), pVnodeGid->vnode, pDb->name); - return false; - } - } - pVgroup = pVgroup->next; - } - - mPrint("db:%s all vnodes drop finished", pDb->name); - return true; -} - -static void mgmtDropDbFromSdb(SDbObj *pDb) { - while (pDb->pHead) mgmtDropVgroup(pDb, pDb->pHead); - -// SSuperTableObj *pMetric = pDb->pSTable; -// while (pMetric) { -// SSuperTableObj *pNext = pMetric->next; -// mgmtDropTable(pDb, pMetric->tableId, 0); -// pMetric = pNext; -// } - - mPrint("db:%s all meters drop finished", pDb->name); - sdbDeleteRow(tsDbSdb, pDb); - mPrint("db:%s database drop finished", pDb->name); -} - -static int32_t mgmtDropDb(SDbObj *pDb) { - if (pDb->dropStatus == TSDB_DB_STATUS_DROPPING) { - bool finished = mgmtCheckDropDbFinished(pDb); - if (!finished) { - SVgObj *pVgroup = pDb->pHead; - while (pVgroup != NULL) { - //mgmtSendDropVgroupMsg(pVgroup, NULL); - pVgroup = pVgroup->next; - } - return TSDB_CODE_ACTION_IN_PROGRESS; - } - - // don't sync this action - pDb->dropStatus = TSDB_DB_STATUS_DROP_FROM_SDB; - mgmtDropDbFromSdb(pDb); - return 0; - } else { - int32_t code = mgmtSetDbDropping(pDb); - if (code != 0) return code; - return TSDB_CODE_ACTION_IN_PROGRESS; - } -} - -static int32_t mgmtDropDbByName(SAcctObj *pAcct, char *name, short ignoreNotExists) { - SDbObj *pDb = (SDbObj *)sdbGetRow(tsDbSdb, name); - if (pDb == NULL) { - if (ignoreNotExists) return TSDB_CODE_SUCCESS; - mWarn("db:%s is not there", name); - return TSDB_CODE_INVALID_DB; - } - - if (mgmtCheckIsMonitorDB(pDb->name, tsMonitorDbName)) { - return TSDB_CODE_MONITOR_DB_FORBIDDEN; - } - - return mgmtDropDb(pDb); -} - bool mgmtCheckIsMonitorDB(char *db, char *monitordb) { char dbName[TSDB_DB_NAME_LEN + 1] = {0}; extractDBName(db, dbName); @@ -428,21 +303,7 @@ bool mgmtCheckIsMonitorDB(char *db, char *monitordb) { return (strncasecmp(dbName, monitordb, len) == 0 && len == strlen(monitordb)); } -UNUSED_FUNC -static void mgmtMonitorDbDrop(void *unused, void *unusedt) { - void * pNode = NULL; - SDbObj *pDb = NULL; - - while (1) { - pNode = sdbFetchRow(tsDbSdb, pNode, (void **)&pDb); - if (pDb == NULL) break; - if (pDb->dropStatus != TSDB_DB_STATUS_DROPPING) continue; - mgmtDropDb(pDb); - break; - } -} - -static int32_t mgmtAlterDb(SAcctObj *pAcct, SAlterDbMsg *pAlter) { +static int32_t mgmtAlterDb(SAcctObj *pAcct, SCMAlterDbMsg *pAlter) { return 0; // int32_t code = TSDB_CODE_SUCCESS; // @@ -562,10 +423,10 @@ void mgmtCleanUpDbs() { sdbCloseTable(tsDbSdb); } -static int32_t mgmtGetDbMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { +static int32_t mgmtGetDbMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { int32_t cols = 0; - SSchema *pSchema = tsGetSchema(pMeta); + SSchema *pSchema = pMeta->schema; SUserObj *pUser = mgmtGetUserFromConn(pConn); if (pUser == NULL) return 0; @@ -577,7 +438,7 @@ static int32_t mgmtGetDbMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { pShow->bytes[cols] = 8; pSchema[cols].type = TSDB_DATA_TYPE_TIMESTAMP; - strcpy(pSchema[cols].name, "created time"); + strcpy(pSchema[cols].name, "created_time"); pSchema[cols].bytes = htons(pShow->bytes[cols]); cols++; @@ -820,7 +681,7 @@ static int32_t mgmtRetrieveDbs(SShowObj *pShow, char *data, int32_t rows, void * cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - strcpy(pWrite, pDb->dropStatus != TSDB_DB_STATUS_READY ? "dropping" : "ready"); + strcpy(pWrite, pDb->dirty != TSDB_DB_STATUS_READY ? "dropping" : "ready"); cols++; numOfRows++; @@ -838,7 +699,6 @@ void *mgmtDbActionInsert(void *row, char *str, int32_t size, int32_t *ssize) { pDb->pTail = NULL; pDb->numOfVgroups = 0; pDb->numOfTables = 0; - pDb->vgTimer = NULL; mgmtAddDbIntoAcct(pAcct, pDb); return NULL; @@ -849,6 +709,10 @@ void *mgmtDbActionDelete(void *row, char *str, int32_t size, int32_t *ssize) { SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); mgmtRemoveDbFromAcct(pAcct, pDb); + mgmtDropAllNormalTables(pDb); + mgmtDropAllChildTables(pDb); + mgmtDropAllSuperTables(pDb); + return NULL; } @@ -904,19 +768,14 @@ void mgmtRemoveTableFromDb(SDbObj *pDb) { atomic_add_fetch_32(&pDb->numOfTables, -1); } -static void mgmtProcessCreateDbMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - if (mgmtCheckRedirect(rpcMsg->handle)) return; - - SUserObj *pUser = mgmtGetUserFromConn(rpcMsg->handle); - if (pUser == NULL) { - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); - return; - } +static void mgmtSetDbDirty(SDbObj *pDb) { + pDb->dirty = true; +} - SCreateDbMsg *pCreate = (SCreateDbMsg *) rpcMsg->pCont; +static void mgmtProcessCreateDbMsg(SQueuedMsg *pMsg) { + if (mgmtCheckRedirect(pMsg->thandle)) return; + SCMCreateDbMsg *pCreate = pMsg->pCont; pCreate->maxSessions = htonl(pCreate->maxSessions); pCreate->cacheBlockSize = htonl(pCreate->cacheBlockSize); pCreate->daysPerFile = htonl(pCreate->daysPerFile); @@ -926,71 +785,113 @@ static void mgmtProcessCreateDbMsg(SRpcMsg *rpcMsg) { pCreate->commitTime = htonl(pCreate->commitTime); pCreate->blocksPerTable = htons(pCreate->blocksPerTable); pCreate->rowsInFileBlock = htonl(pCreate->rowsInFileBlock); - // pCreate->cacheNumOfBlocks = htonl(pCreate->cacheNumOfBlocks); + int32_t code; if (mgmtCheckExpired()) { - rpcRsp.code = TSDB_CODE_GRANT_EXPIRED; - } else if (!pUser->writeAuth) { - rpcRsp.code = TSDB_CODE_NO_RIGHTS; + code = TSDB_CODE_GRANT_EXPIRED; + } else if (!pMsg->pUser->writeAuth) { + code = TSDB_CODE_NO_RIGHTS; } else { - rpcRsp.code = mgmtCreateDb(pUser->pAcct, pCreate); - if (rpcRsp.code == TSDB_CODE_SUCCESS) { - mLPrint("DB:%s is created by %s", pCreate->db, pUser->user); + code = mgmtCreateDb(pMsg->pUser->pAcct, pCreate); + if (code == TSDB_CODE_SUCCESS) { + mLPrint("DB:%s is created by %s", pCreate->db, pMsg->pUser->user); } } - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, code); } -static void mgmtProcessAlterDbMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - if (mgmtCheckRedirect(rpcMsg->handle)) return; - - SUserObj *pUser = mgmtGetUserFromConn(rpcMsg->handle); - if (pUser == NULL) { - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); - return; - } +static void mgmtProcessAlterDbMsg(SQueuedMsg *pMsg) { + if (mgmtCheckRedirect(pMsg->thandle)) return; - SAlterDbMsg *pAlter = (SAlterDbMsg *) rpcMsg->pCont; + SCMAlterDbMsg *pAlter = pMsg->pCont; pAlter->daysPerFile = htonl(pAlter->daysPerFile); pAlter->daysToKeep = htonl(pAlter->daysToKeep); pAlter->maxSessions = htonl(pAlter->maxSessions) + 1; - if (!pUser->writeAuth) { - rpcRsp.code = TSDB_CODE_NO_RIGHTS; + int32_t code; + if (!pMsg->pUser->writeAuth) { + code = TSDB_CODE_NO_RIGHTS; } else { - rpcRsp.code = mgmtAlterDb(pUser->pAcct, pAlter); - if (rpcRsp.code == TSDB_CODE_SUCCESS) { - mLPrint("DB:%s is altered by %s", pAlter->db, pUser->user); + code = mgmtAlterDb(pMsg->pUser->pAcct, pAlter); + if (code == TSDB_CODE_SUCCESS) { + mLPrint("DB:%s is altered by %s", pAlter->db, pMsg->pUser->user); } } - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, code); +} + +static void mgmtDropDb(void *handle, void *tmrId) { + SQueuedMsg *newMsg = handle; + SDbObj *pDb = newMsg->ahandle; + mPrint("db:%s, drop db from sdb", pDb->name); + + int32_t code = sdbDeleteRow(tsDbSdb, pDb); + if (code != 0) { + code = TSDB_CODE_SDB_ERROR; + } + + mgmtSendSimpleResp(newMsg->thandle, code); + rpcFreeCont(newMsg->pCont); + free(newMsg); } +static void mgmtProcessDropDbMsg(SQueuedMsg *pMsg) { + if (mgmtCheckRedirect(pMsg->thandle)) return; -static void mgmtProcessDropDbMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - if (mgmtCheckRedirect(rpcMsg->handle)) return; + SCMDropDbMsg *pDrop = pMsg->pCont; + mTrace("db:%s, drop db msg is received from thandle:%p", pDrop->db, pMsg->thandle); - SUserObj *pUser = mgmtGetUserFromConn(rpcMsg->handle); - if (pUser == NULL) { - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); - return ; + if (mgmtCheckExpired()) { + mError("db:%s, failed to drop, grant expired", pDrop->db); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_GRANT_EXPIRED); + return; + } + + if (!pMsg->pUser->writeAuth) { + mError("db:%s, failed to drop, no rights", pDrop->db); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_NO_RIGHTS); + return; } - if (pUser->superAuth) { - SDropDbMsg *pDrop = rpcMsg->pCont; - rpcRsp.code = mgmtDropDbByName(pUser->pAcct, pDrop->db, pDrop->ignoreNotExists); - if (rpcRsp.code == TSDB_CODE_SUCCESS) { - mLPrint("DB:%s is dropped by %s", pDrop->db, pUser->user); + SDbObj *pDb = mgmtGetDb(pDrop->db); + if (pDb == NULL) { + if (pDrop->ignoreNotExists) { + mTrace("db:%s, db is not exist, think drop success", pDrop->db); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_SUCCESS); + return; + } else { + mError("db:%s, failed to drop, invalid db", pDrop->db); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_INVALID_DB); + return; } - } else { - rpcRsp.code = TSDB_CODE_NO_RIGHTS; } - rpcSendResponse(&rpcRsp); + if (mgmtCheckIsMonitorDB(pDb->name, tsMonitorDbName)) { + mError("db:%s, can't drop monitor database", pDrop->db); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_MONITOR_DB_FORBIDDEN); + return; + } + + mgmtSetDbDirty(pDb); + + SQueuedMsg *newMsg = malloc(sizeof(SQueuedMsg)); + memcpy(newMsg, pMsg, sizeof(SQueuedMsg)); + pMsg->pCont = NULL; + + SVgObj *pVgroup = pDb->pHead; + if (pVgroup != NULL) { + mPrint("vgroup:%d, will be dropped", pVgroup->vgId); + newMsg->ahandle = pVgroup; + newMsg->expected = pVgroup->numOfVnodes; + mgmtDropVgroup(pVgroup, newMsg); + return; + } + + mTrace("db:%s, all vgroups is dropped", pDb->name); + + void *tmpTmr; + newMsg->ahandle = pDb; + taosTmrReset(mgmtDropDb, 10, newMsg, tsMgmtTmr, &tmpTmr); } diff --git a/src/mnode/src/mgmtDnode.c b/src/mnode/src/mgmtDnode.c index 576b7ebf6ef6088b79ecd7db99397faa4519cce1..cf57b43918fb5fd89665d6aee770007a0768a1d5 100644 --- a/src/mnode/src/mgmtDnode.c +++ b/src/mnode/src/mgmtDnode.c @@ -16,19 +16,20 @@ #define _DEFAULT_SOURCE #include "os.h" #include "tmodule.h" -#include "tschemautil.h" #include "tstatus.h" #include "mgmtBalance.h" #include "mgmtDnode.h" #include "mgmtDClient.h" #include "mgmtMnode.h" #include "mgmtShell.h" +#include "mgmtDServer.h" #include "mgmtUser.h" #include "mgmtVgroup.h" int32_t (*mgmtInitDnodesFp)() = NULL; void (*mgmtCleanUpDnodesFp)() = NULL; SDnodeObj *(*mgmtGetDnodeFp)(uint32_t ip) = NULL; +SDnodeObj *(*mgmtGetDnodeByIpFp)(int32_t dnodeId) = NULL; int32_t (*mgmtGetDnodesNumFp)() = NULL; int32_t (*mgmtUpdateDnodeFp)(SDnodeObj *pDnode) = NULL; void * (*mgmtGetNextDnodeFp)(SShowObj *pShow, SDnodeObj **pDnode) = NULL; @@ -37,93 +38,42 @@ void (*mgmtSetDnodeUnRemoveFp)(SDnodeObj *pDnode) = NULL; static SDnodeObj tsDnodeObj = {0}; static void * mgmtGetNextDnode(SShowObj *pShow, SDnodeObj **pDnode); static bool mgmtCheckConfigShow(SGlobalConfig *cfg); -static int32_t mgmtGetModuleMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn); +static int32_t mgmtGetModuleMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); static int32_t mgmtRetrieveModules(SShowObj *pShow, char *data, int32_t rows, void *pConn); -static int32_t mgmtGetConfigMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn); +static int32_t mgmtGetConfigMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); static int32_t mgmtRetrieveConfigs(SShowObj *pShow, char *data, int32_t rows, void *pConn); -static int32_t mgmtGetVnodeMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn); +static int32_t mgmtGetVnodeMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); static int32_t mgmtRetrieveVnodes(SShowObj *pShow, char *data, int32_t rows, void *pConn); -static void mgmtProcessCfgDnodeMsg(SRpcMsg *rpcMsg); +static void mgmtProcessCfgDnodeMsg(SQueuedMsg *pMsg); +static void mgmtProcessCfgDnodeMsgRsp(SRpcMsg *rpcMsg) ; +static void mgmtProcessDnodeStatusMsg(SRpcMsg *rpcMsg); void mgmtSetDnodeMaxVnodes(SDnodeObj *pDnode) { int32_t maxVnodes = pDnode->numOfCores * tsNumOfVnodesPerCore; + maxVnodes = maxVnodes > TSDB_MAX_VNODES ? TSDB_MAX_VNODES : maxVnodes; maxVnodes = maxVnodes < TSDB_MIN_VNODES ? TSDB_MIN_VNODES : maxVnodes; - if (pDnode->numOfTotalVnodes != 0) { - maxVnodes = pDnode->numOfTotalVnodes; + + if (pDnode->numOfTotalVnodes == 0) { + pDnode->numOfTotalVnodes = maxVnodes; } + if (pDnode->alternativeRole == TSDB_DNODE_ROLE_MGMT) { - maxVnodes = 0; + pDnode->numOfTotalVnodes = 0; } - pDnode->numOfVnodes = maxVnodes; - pDnode->numOfFreeVnodes = maxVnodes; pDnode->openVnodes = 0; pDnode->status = TSDB_DN_STATUS_OFFLINE; -} - -void mgmtCalcNumOfFreeVnodes(SDnodeObj *pDnode) { - int32_t totalVnodes = 0; - - mTrace("dnode:%s, begin calc free vnodes", taosIpStr(pDnode->privateIp)); - for (int32_t i = 0; i < pDnode->numOfVnodes; ++i) { - SVnodeLoad *pVload = pDnode->vload + i; - if (pVload->vgId != 0) { - mTrace("%d-dnode:%s, calc free vnodes, exist vnode:%d, vgroup:%d, state:%d %s, dropstate:%d %s, syncstatus:%d %s", - totalVnodes, taosIpStr(pDnode->privateIp), i, pVload->vgId, - pVload->status, taosGetVnodeStatusStr(pVload->status), - pVload->dropStatus, taosGetVnodeDropStatusStr(pVload->dropStatus), - pVload->syncStatus, taosGetVnodeSyncStatusStr(pVload->syncStatus)); - totalVnodes++; - } - } - pDnode->numOfFreeVnodes = pDnode->numOfVnodes - totalVnodes; - mTrace("dnode:%s, numOfVnodes:%d, numOfFreeVnodes:%d, totalVnodes:%d", - taosIpStr(pDnode->privateIp), pDnode->numOfVnodes, pDnode->numOfFreeVnodes, totalVnodes); + mgmtUpdateDnode(pDnode); } -void mgmtSetDnodeVgid(SVnodeGid vnodeGid[], int32_t numOfVnodes, int32_t vgId) { - SDnodeObj *pDnode; - - for (int32_t i = 0; i < numOfVnodes; ++i) { - pDnode = mgmtGetDnode(vnodeGid[i].ip); - if (pDnode) { - SVnodeLoad *pVload = pDnode->vload + vnodeGid[i].vnode; - memset(pVload, 0, sizeof(SVnodeLoad)); - pVload->vnode = vnodeGid[i].vnode; - pVload->vgId = vgId; - mTrace("dnode:%s, vnode:%d add to vgroup:%d", taosIpStr(vnodeGid[i].ip), vnodeGid[i].vnode, pVload->vgId); - mgmtCalcNumOfFreeVnodes(pDnode); - } else { - mError("dnode:%s, not in dnode DB!!!", taosIpStr(vnodeGid[i].ip)); - } - } -} - -void mgmtUnSetDnodeVgid(SVnodeGid vnodeGid[], int32_t numOfVnodes) { - SDnodeObj *pDnode; - - for (int32_t i = 0; i < numOfVnodes; ++i) { - pDnode = mgmtGetDnode(vnodeGid[i].ip); - if (pDnode) { - SVnodeLoad *pVload = pDnode->vload + vnodeGid[i].vnode; - mTrace("dnode:%s, vnode:%d remove from vgroup:%d", taosIpStr(vnodeGid[i].ip), vnodeGid[i].vnode, pVload->vgId); - memset(pVload, 0, sizeof(SVnodeLoad)); - mgmtCalcNumOfFreeVnodes(pDnode); - } else { - mError("dnode:%s not in dnode DB!!!", taosIpStr(vnodeGid[i].ip)); - } - } -} - - bool mgmtCheckModuleInDnode(SDnodeObj *pDnode, int32_t moduleType) { uint32_t status = pDnode->moduleStatus & (1 << moduleType); return status > 0; } -int32_t mgmtGetModuleMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { +int32_t mgmtGetModuleMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { int32_t cols = 0; SUserObj *pUser = mgmtGetUserFromConn(pConn); @@ -131,7 +81,7 @@ int32_t mgmtGetModuleMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { if (strcmp(pUser->user, "root") != 0) return TSDB_CODE_NO_RIGHTS; - SSchema *pSchema = tsGetSchema(pMeta); + SSchema *pSchema = pMeta->schema; pShow->bytes[cols] = 16; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; @@ -216,7 +166,7 @@ int32_t mgmtRetrieveModules(SShowObj *pShow, char *data, int32_t rows, void *pCo return numOfRows; } -static int32_t mgmtGetConfigMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { +static int32_t mgmtGetConfigMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { int32_t cols = 0; SUserObj *pUser = mgmtGetUserFromConn(pConn); @@ -224,7 +174,7 @@ static int32_t mgmtGetConfigMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn if (strcmp(pUser->user, "root") != 0) return TSDB_CODE_NO_RIGHTS; - SSchema *pSchema = tsGetSchema(pMeta); + SSchema *pSchema = pMeta->schema; pShow->bytes[cols] = TSDB_CFG_OPTION_LEN; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; @@ -304,13 +254,13 @@ static int32_t mgmtRetrieveConfigs(SShowObj *pShow, char *data, int32_t rows, vo return numOfRows; } -static int32_t mgmtGetVnodeMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { +static int32_t mgmtGetVnodeMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { int32_t cols = 0; SUserObj *pUser = mgmtGetUserFromConn(pConn); if (pUser == NULL) return 0; if (strcmp(pUser->user, "root") != 0) return TSDB_CODE_NO_RIGHTS; - SSchema *pSchema = tsGetSchema(pMeta); + SSchema *pSchema = pMeta->schema; pShow->bytes[cols] = 4; pSchema[cols].type = TSDB_DATA_TYPE_INT; @@ -318,12 +268,6 @@ static int32_t mgmtGetVnodeMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) pSchema[cols].bytes = htons(pShow->bytes[cols]); cols++; - pShow->bytes[cols] = 4; - pSchema[cols].type = TSDB_DATA_TYPE_INT; - strcpy(pSchema[cols].name, "vgid"); - pSchema[cols].bytes = htons(pShow->bytes[cols]); - cols++; - pShow->bytes[cols] = 12; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; strcpy(pSchema[cols].name, "status"); @@ -332,7 +276,7 @@ static int32_t mgmtGetVnodeMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) pShow->bytes[cols] = 12; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; - strcpy(pSchema[cols].name, "sync status"); + strcpy(pSchema[cols].name, "sync_status"); pSchema[cols].bytes = htons(pShow->bytes[cols]); cols++; @@ -342,11 +286,10 @@ static int32_t mgmtGetVnodeMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) pShow->offset[0] = 0; for (int32_t i = 1; i < cols; ++i) pShow->offset[i] = pShow->offset[i - 1] + pShow->bytes[i - 1]; - // TODO: if other thread drop dnode ???? SDnodeObj *pDnode = NULL; if (pShow->payloadLen > 0 ) { uint32_t ip = ip2uint(pShow->payload); - pDnode = mgmtGetDnode(ip); + pDnode = mgmtGetDnodeByIp(ip); if (NULL == pDnode) { return TSDB_CODE_NODE_OFFLINE; } @@ -399,10 +342,6 @@ static int32_t mgmtRetrieveVnodes(SShowObj *pShow, char *data, int32_t rows, voi cols = 0; - pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - *(uint32_t *)pWrite = pVnode->vnode; - cols++; - pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; *(uint32_t *)pWrite = pVnode->vgId; cols++; @@ -435,19 +374,22 @@ int32_t mgmtInitDnodes() { mgmtAddShellShowMetaHandle(TSDB_MGMT_TABLE_VNODES, mgmtGetVnodeMeta); mgmtAddShellShowRetrieveHandle(TSDB_MGMT_TABLE_VNODES, mgmtRetrieveVnodes); mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_CONFIG_DNODE, mgmtProcessCfgDnodeMsg); + mgmtAddDClientRspHandle(TSDB_MSG_TYPE_MD_CONFIG_DNODE_RSP, mgmtProcessCfgDnodeMsgRsp); + mgmtAddDServerMsgHandle(TSDB_MSG_TYPE_DM_STATUS, mgmtProcessDnodeStatusMsg); if (mgmtInitDnodesFp) { return mgmtInitDnodesFp(); } else { - tsDnodeObj.privateIp = inet_addr(tsPrivateIp);; + tsDnodeObj.dnodeId = 1; + tsDnodeObj.privateIp = inet_addr(tsPrivateIp); + tsDnodeObj.publicIp = inet_addr(tsPublicIp); tsDnodeObj.createdTime = taosGetTimestampMs(); - tsDnodeObj.lastReboot = taosGetTimestampSec(); + tsDnodeObj.numOfTotalVnodes = tsNumOfTotalVnodes; tsDnodeObj.numOfCores = (uint16_t) tsNumOfCores; - tsDnodeObj.status = TSDB_DN_STATUS_READY; tsDnodeObj.alternativeRole = TSDB_DNODE_ROLE_ANY; - tsDnodeObj.numOfTotalVnodes = tsNumOfTotalVnodes; - tsDnodeObj.thandle = (void *) (1); //hack way - tsDnodeObj.status = TSDB_DN_STATUS_READY; + tsDnodeObj.status = TSDB_DN_STATUS_OFFLINE; + tsDnodeObj.lastReboot = taosGetTimestampSec(); + sprintf(tsDnodeObj.dnodeName, "%d", tsDnodeObj.dnodeId); mgmtSetDnodeMaxVnodes(&tsDnodeObj); tsDnodeObj.moduleStatus |= (1 << TSDB_MOD_MGMT); @@ -463,21 +405,30 @@ int32_t mgmtInitDnodes() { void mgmtCleanUpDnodes() { if (mgmtCleanUpDnodesFp) { - mgmtCleanUpDnodesFp(); + (*mgmtCleanUpDnodesFp)(); } } -SDnodeObj *mgmtGetDnode(uint32_t ip) { +SDnodeObj *mgmtGetDnode(int32_t dnodeId) { if (mgmtGetDnodeFp) { - return mgmtGetDnodeFp(ip); - } else { + return (*mgmtGetDnodeFp)(dnodeId); + } + if (dnodeId == 1) { return &tsDnodeObj; } + return NULL; +} + +SDnodeObj *mgmtGetDnodeByIp(uint32_t ip) { + if (mgmtGetDnodeByIpFp) { + return (*mgmtGetDnodeByIpFp)(ip); + } + return &tsDnodeObj; } int32_t mgmtGetDnodesNum() { if (mgmtGetDnodesNumFp) { - return mgmtGetDnodesNumFp(); + return (*mgmtGetDnodesNumFp)(); } else { return 1; } @@ -485,7 +436,7 @@ int32_t mgmtGetDnodesNum() { int32_t mgmtUpdateDnode(SDnodeObj *pDnode) { if (mgmtUpdateDnodeFp) { - return mgmtUpdateDnodeFp(pDnode); + return (*mgmtUpdateDnodeFp)(pDnode); } else { return 0; } @@ -493,7 +444,7 @@ int32_t mgmtUpdateDnode(SDnodeObj *pDnode) { void *mgmtGetNextDnode(SShowObj *pShow, SDnodeObj **pDnode) { if (mgmtGetNextDnodeFp) { - return mgmtGetNextDnodeFp(pShow, pDnode); + return (*mgmtGetNextDnodeFp)(pShow, pDnode); } else { if (*pDnode == NULL) { *pDnode = &tsDnodeObj; @@ -507,14 +458,12 @@ void *mgmtGetNextDnode(SShowObj *pShow, SDnodeObj **pDnode) { void mgmtSetDnodeUnRemove(SDnodeObj *pDnode) { if (mgmtSetDnodeUnRemoveFp) { - mgmtSetDnodeUnRemoveFp(pDnode); + (*mgmtSetDnodeUnRemoveFp)(pDnode); } } bool mgmtCheckConfigShow(SGlobalConfig *cfg) { - if (cfg->cfgType & TSDB_CFG_CTYPE_B_CLUSTER) - return false; - if (cfg->cfgType & TSDB_CFG_CTYPE_B_NOT_PRINT) + if (!(cfg->cfgType & TSDB_CFG_CTYPE_B_SHOW)) return false; return true; } @@ -527,21 +476,19 @@ bool mgmtCheckDnodeInOfflineState(SDnodeObj *pDnode) { return pDnode->status == TSDB_DN_STATUS_OFFLINE; } -void mgmtProcessCfgDnodeMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - if (mgmtCheckRedirect(rpcMsg->handle)) return; +void mgmtProcessCfgDnodeMsg(SQueuedMsg *pMsg) { + SRpcMsg rpcRsp = {.handle = pMsg->thandle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; + if (mgmtCheckRedirect(pMsg->thandle)) return; - SUserObj *pUser = mgmtGetUserFromConn(rpcMsg->handle); - if (pUser == NULL) { - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); - return; + SCMCfgDnodeMsg *pCmCfgDnode = pMsg->pCont; + if (pCmCfgDnode->ip[0] == 0) { + strcpy(pCmCfgDnode->ip, tsPrivateIp); + } else { + strcpy(pCmCfgDnode->ip, pCmCfgDnode->ip); } - - SCMCfgDnodeMsg *pCmCfgDnode = (SCMCfgDnodeMsg *) rpcMsg->pCont; uint32_t dnodeIp = inet_addr(pCmCfgDnode->ip); - if (strcmp(pUser->pAcct->user, "root") != 0) { + if (strcmp(pMsg->pUser->pAcct->user, "root") != 0) { rpcRsp.code = TSDB_CODE_NO_RIGHTS; } else { SRpcIpSet ipSet = mgmtGetIpSetFromIp(dnodeIp); @@ -560,8 +507,115 @@ void mgmtProcessCfgDnodeMsg(SRpcMsg *rpcMsg) { } if (rpcRsp.code == TSDB_CODE_SUCCESS) { - mTrace("dnode:%s is configured by %s", pCmCfgDnode->ip, pUser->user); + mTrace("dnode:%s is configured by %s", pCmCfgDnode->ip, pMsg->pUser->user); + } + + rpcSendResponse(&rpcRsp); +} + +static void mgmtProcessCfgDnodeMsgRsp(SRpcMsg *rpcMsg) { + mTrace("cfg vnode rsp is received"); +} + +void mgmtProcessDnodeStatusMsg(SRpcMsg *rpcMsg) { + if (mgmtCheckRedirect(rpcMsg->handle)) return; + + SDMStatusMsg *pStatus = rpcMsg->pCont; + pStatus->dnodeId = htonl(pStatus->dnodeId); + + SDnodeObj *pDnode = NULL; + if (pStatus->dnodeId == 0) { + pDnode = mgmtGetDnodeByIp(htonl(pStatus->privateIp)); + if (pDnode == NULL) { + mTrace("dnode not created, privateIp:%s", taosIpStr(htonl(pStatus->privateIp))); + mgmtSendSimpleResp(rpcMsg->handle, TSDB_CODE_DNODE_NOT_EXIST); + return; + } + } else { + pDnode = mgmtGetDnode(pStatus->dnodeId); + if (pDnode == NULL) { + mError("dnode:%d, not exist, privateIp:%s", taosIpStr(pStatus->dnodeId), pStatus->dnodeName); + mgmtSendSimpleResp(rpcMsg->handle, TSDB_CODE_DNODE_NOT_EXIST); + return; + } + } + + uint32_t version = htonl(pStatus->version); + if (version != tsVersion) { + mError("dnode:%d, status msg version:%d not equal with mnode:%d", pDnode->dnodeId, version, tsVersion); + mgmtSendSimpleResp(rpcMsg->handle, TSDB_CODE_INVALID_MSG_VERSION); + return ; } + + uint32_t lastPrivateIp = pDnode->privateIp; + uint32_t lastPublicIp = pDnode->publicIp; + + pDnode->privateIp = htonl(pStatus->privateIp); + pDnode->publicIp = htonl(pStatus->publicIp); + pDnode->lastReboot = htonl(pStatus->lastReboot); + pDnode->numOfCores = htons(pStatus->numOfCores); + pDnode->diskAvailable = pStatus->diskAvailable; + pDnode->alternativeRole = pStatus->alternativeRole; + + if (pDnode->numOfTotalVnodes == 0) { + pDnode->numOfTotalVnodes = htons(pStatus->numOfTotalVnodes); + } + + if (pStatus->dnodeId == 0) { + mTrace("dnode:%d, first access, privateIp:%s, name:%s, ", pDnode->dnodeId, taosIpStr(pDnode->privateIp), pDnode->dnodeName); + mgmtSetDnodeMaxVnodes(pDnode); + } + + if (lastPrivateIp != pDnode->privateIp || lastPublicIp != pDnode->publicIp) { + mgmtUpdateVgroupIp(pDnode); + //mgmtUpdateMnodeIp(); + } + + int32_t openVnodes = htons(pStatus->openVnodes); + for (int32_t j = 0; j < openVnodes; ++j) { + pDnode->vload[j].vgId = htonl(pStatus->load[j].vgId); + pDnode->vload[j].totalStorage = htobe64(pStatus->load[j].totalStorage); + pDnode->vload[j].compStorage = htobe64(pStatus->load[j].compStorage); + pDnode->vload[j].pointsWritten = htobe64(pStatus->load[j].pointsWritten); + + SVgObj *pVgroup = mgmtGetVgroup(pDnode->vload[j].vgId); + if (pVgroup == NULL) { + SRpcIpSet ipSet = mgmtGetIpSetFromIp(pDnode->privateIp); + mPrint("dnode:%d, vnode:%d not exist in mnode, drop it", pDnode->dnodeId, pDnode->vload[j].vgId); + mgmtSendDropVnodeMsg(pDnode->vload[j].vgId, &ipSet, NULL); + } + } + + if (pDnode->status != TSDB_DN_STATUS_READY) { + mTrace("dnode:%d, from offline to online", pDnode->dnodeId); + pDnode->status = TSDB_DN_STATUS_READY; + mgmtStartBalanceTimer(200); + } + + int32_t contLen = sizeof(SDMStatusRsp) + TSDB_MAX_VNODES * sizeof(SVnodeAccess); + SDMStatusRsp *pRsp = rpcMallocCont(contLen); + if (pRsp == NULL) { + mgmtSendSimpleResp(rpcMsg->handle, TSDB_CODE_SERV_OUT_OF_MEMORY); + return; + } + + mgmtGetMnodeIpList(&pRsp->ipList); + + pRsp->dnodeState.dnodeId = htonl(pDnode->dnodeId); + pRsp->dnodeState.moduleStatus = htonl(pDnode->moduleStatus); + pRsp->dnodeState.createdTime = htonl(pDnode->createdTime / 1000); + pRsp->dnodeState.numOfVnodes = 0; + + contLen = sizeof(SDMStatusRsp); + + //TODO: set vnode access + + SRpcMsg rpcRsp = { + .handle = rpcMsg->handle, + .code = TSDB_CODE_SUCCESS, + .pCont = pRsp, + .contLen = contLen + }; rpcSendResponse(&rpcRsp); } diff --git a/src/mnode/src/mgmtSystem.c b/src/mnode/src/mgmtMain.c similarity index 78% rename from src/mnode/src/mgmtSystem.c rename to src/mnode/src/mgmtMain.c index 96e0abcb700b9d1cb529ee91093b7fb68b9d5aee..66200e5a1407aa59e1e2a76f736c23eb0f82a7b3 100644 --- a/src/mnode/src/mgmtSystem.c +++ b/src/mnode/src/mgmtMain.c @@ -27,41 +27,27 @@ #include "mgmtDServer.h" #include "mgmtVgroup.h" #include "mgmtUser.h" -#include "mgmtSystem.h" #include "mgmtTable.h" #include "mgmtShell.h" -char tsMgmtDirectory[128] = {0}; -void *tsMgmtTmr = NULL; -void *tsMgmtTranQhandle = NULL; +static int32_t mgmtCheckMgmtRunning(); +void *tsMgmtTmr = NULL; - -void mgmtCleanUpSystem() { - mPrint("starting to clean up mgmt"); - - sdbCleanUpPeers(); - mgmtCleanupBalance(); - mgmtCleanupDClient(); - mgmtCleanupDServer(); - mgmtCleanUpShell(); - mgmtCleanUpTables(); - mgmtCleanUpVgroups(); - mgmtCleanUpDbs(); - mgmtCleanUpDnodes(); - mgmtCleanUpUsers(); - mgmtCleanUpAccts(); - taosTmrCleanUp(tsMgmtTmr); - taosCleanUpScheduler(tsMgmtTranQhandle); - - mPrint("mgmt is cleaned up"); -} - -int32_t mgmtCheckMgmtRunning() { - if (tsModuleStatus & (1 << TSDB_MOD_MGMT)) { +int32_t mgmtInitSystem() { + if (mgmtInitShell() != 0) { + mError("failed to init shell"); return -1; } - tsetModuleStatus(TSDB_MOD_MGMT); + struct stat dirstat; + bool fileExist = (stat(tsMnodeDir, &dirstat) == 0); + bool asMaster = (strcmp(tsMasterIp, tsPrivateIp) == 0); + + if (asMaster || fileExist) { + if (mgmtStartSystem() != 0) { + return -1; + } + } return 0; } @@ -70,8 +56,8 @@ int32_t mgmtStartSystem() { mPrint("starting to initialize TDengine mgmt ..."); struct stat dirstat; - if (stat(tsMgmtDirectory, &dirstat) < 0) { - mkdir(tsMgmtDirectory, 0755); + if (stat(tsMnodeDir, &dirstat) < 0) { + mkdir(tsMnodeDir, 0755); } if (mgmtCheckMgmtRunning() != 0) { @@ -79,11 +65,9 @@ int32_t mgmtStartSystem() { return 0; } - tsMgmtTranQhandle = taosInitScheduler(tsMaxDnodes + tsMaxShellConns, 1, "mnodeT"); - tsMgmtTmr = taosTmrInit((tsMaxDnodes + tsMaxShellConns) * 3, 200, 3600000, "MND"); if (tsMgmtTmr == NULL) { - mError("failed to init timer, exit"); + mError("failed to init timer"); return -1; } @@ -125,12 +109,7 @@ int32_t mgmtStartSystem() { return -1; } - if (mgmtInitShell() < 0) { - mError("failed to init shell"); - return -1; - } - - if (sdbInitPeers(tsMgmtDirectory) < 0) { + if (sdbInitPeers(tsMnodeDir) < 0) { mError("failed to init peers"); return -1; } @@ -139,30 +118,11 @@ int32_t mgmtStartSystem() { mError("failed to init dnode balance") } - mPrint("TDengine mgmt is initialized successfully"); return 0; } -int32_t mgmtInitSystem() { - struct stat dirstat; - bool directoryExist = (stat(tsMgmtDirectory, &dirstat) == 0); - bool equalWithMaster = (strcmp(tsMasterIp, tsPrivateIp) == 0); - - if (equalWithMaster || directoryExist) { - if (mgmtStartSystem() != 0) { - return -1; - } - } - - if (mgmtInitShell() < 0) { - mError("failed to init shell"); - return -1; - } - - return 0; -} void mgmtStopSystem() { if (sdbMaster) { @@ -171,6 +131,31 @@ void mgmtStopSystem() { } mgmtCleanUpSystem(); - remove(tsMgmtDirectory); -// mgmtInitRedirect(); + remove(tsMnodeDir); } + +void mgmtCleanUpSystem() { + mPrint("starting to clean up mgmt"); + sdbCleanUpPeers(); + mgmtCleanupBalance(); + mgmtCleanUpShell(); + mgmtCleanupDClient(); + mgmtCleanupDServer(); + mgmtCleanUpTables(); + mgmtCleanUpVgroups(); + mgmtCleanUpDbs(); + mgmtCleanUpDnodes(); + mgmtCleanUpUsers(); + mgmtCleanUpAccts(); + taosTmrCleanUp(tsMgmtTmr); + mPrint("mgmt is cleaned up"); +} + +static int32_t mgmtCheckMgmtRunning() { + if (tsModuleStatus & (1 << TSDB_MOD_MGMT)) { + return -1; + } + + tsetModuleStatus(TSDB_MOD_MGMT); + return 0; +} \ No newline at end of file diff --git a/src/mnode/src/mgmtMnode.c b/src/mnode/src/mgmtMnode.c index 9e1b3f8ceb70a8c37e571dd3aeb54dc83ffe8c2a..f7e6e3f8cb4c984917a30ff7000e7d4373619c3b 100644 --- a/src/mnode/src/mgmtMnode.c +++ b/src/mnode/src/mgmtMnode.c @@ -15,8 +15,161 @@ #define _DEFAULT_SOURCE #include "os.h" +#include "trpc.h" #include "mgmtMnode.h" +#include "mgmtUser.h" + +int32_t (*mgmtAddMnodeFp)(uint32_t privateIp, uint32_t publicIp) = NULL; +int32_t (*mgmtRemoveMnodeFp)(uint32_t privateIp) = NULL; +int32_t (*mgmtGetMnodesNumFp)() = NULL; +void * (*mgmtGetNextMnodeFp)(SShowObj *pShow, SMnodeObj **pMnode) = NULL; + +static SMnodeObj tsMnodeObj = {0}; +static int32_t mgmtGetMnodeMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); +static int32_t mgmtRetrieveMnodes(SShowObj *pShow, char *data, int32_t rows, void *pConn); bool mgmtCheckRedirect(void *handle) { - return true; + return false; +} + +int32_t mgmtAddMnode(uint32_t privateIp, uint32_t publicIp) { + if (mgmtAddMnodeFp) { + return (*mgmtAddMnodeFp)(privateIp, publicIp); + } else { + return 0; + } +} + +int32_t mgmtRemoveMnode(uint32_t privateIp) { + if (mgmtRemoveMnodeFp) { + return (*mgmtRemoveMnodeFp)(privateIp); + } else { + return 0; + } +} + +static int32_t mgmtGetMnodesNum() { + if (mgmtGetMnodesNumFp) { + return (*mgmtGetMnodesNumFp)(); + } else { + return 1; + } +} + +static void *mgmtGetNextMnode(SShowObj *pShow, SMnodeObj **pMnode) { + if (mgmtGetNextMnodeFp) { + return (*mgmtGetNextMnodeFp)(pShow, pMnode); + } else { + if (*pMnode == NULL) { + *pMnode = &tsMnodeObj; + } else { + *pMnode = NULL; + } + } + + return *pMnode; +} + +static int32_t mgmtGetMnodeMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { + int32_t cols = 0; + + SUserObj *pUser = mgmtGetUserFromConn(pConn); + if (pUser == NULL) return 0; + + if (strcmp(pUser->user, "root") != 0) return TSDB_CODE_NO_RIGHTS; + + SSchema *pSchema = pMeta->schema; + + pShow->bytes[cols] = 16; + pSchema[cols].type = TSDB_DATA_TYPE_BINARY; + strcpy(pSchema[cols].name, "private ip"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pShow->bytes[cols] = 8; + pSchema[cols].type = TSDB_DATA_TYPE_TIMESTAMP; + strcpy(pSchema[cols].name, "create time"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pShow->bytes[cols] = 10; + pSchema[cols].type = TSDB_DATA_TYPE_BINARY; + strcpy(pSchema[cols].name, "status"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pShow->bytes[cols] = 10; + pSchema[cols].type = TSDB_DATA_TYPE_BINARY; + strcpy(pSchema[cols].name, "role"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pShow->bytes[cols] = 16; + pSchema[cols].type = TSDB_DATA_TYPE_BINARY; + strcpy(pSchema[cols].name, "public ip"); + pSchema[cols].bytes = htons(pShow->bytes[cols]); + cols++; + + pMeta->numOfColumns = htons(cols); + pShow->numOfColumns = cols; + + pShow->offset[0] = 0; + for (int32_t i = 1; i < cols; ++i) { + pShow->offset[i] = pShow->offset[i - 1] + pShow->bytes[i - 1]; + } + + pShow->numOfRows = mgmtGetMnodesNum(); + pShow->rowSize = pShow->offset[cols - 1] + pShow->bytes[cols - 1]; + pShow->pNode = NULL; + + return 0; +} + +static int32_t mgmtRetrieveMnodes(SShowObj *pShow, char *data, int32_t rows, void *pConn) { + int32_t numOfRows = 0; + int32_t cols = 0; + SMnodeObj *pMnode = NULL; + char *pWrite; + char ipstr[32]; + + while (numOfRows < rows) { + pShow->pNode = mgmtGetNextMnode(pShow, (SMnodeObj **)&pMnode); + if (pMnode == NULL) break; + + cols = 0; + + tinet_ntoa(ipstr, pMnode->privateIp); + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + strcpy(pWrite, ipstr); + cols++; + + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + *(int64_t *)pWrite = pMnode->createdTime; + cols++; + + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + strcpy(pWrite, sdbStatusStr[(uint8_t)pMnode->status]); + cols++; + + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + strcpy(pWrite, sdbRoleStr[(uint8_t)pMnode->role]); + cols++; + + tinet_ntoa(ipstr, pMnode->publicIp); + pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; + strcpy(pWrite, ipstr); + cols++; + + numOfRows++; + } + + pShow->numOfReads += numOfRows; + return numOfRows; +} + +void mgmtGetMnodeIpList(SRpcIpSet *ipSet) { + ipSet->inUse = 0; + ipSet->port = htons(tsMnodeDnodePort); + ipSet->numOfIps = 1; + ipSet->ip[0] = htonl(inet_addr(tsMasterIp)); } \ No newline at end of file diff --git a/src/mnode/src/mgmtNormalTable.c b/src/mnode/src/mgmtNormalTable.c index 3e215888df8900c7006b8c9b2d5a360b5beda4c8..f076b69f36323f08c0e69195d6d1ab08b4ac4150 100644 --- a/src/mnode/src/mgmtNormalTable.c +++ b/src/mnode/src/mgmtNormalTable.c @@ -125,7 +125,6 @@ void *mgmtNormalTableActionDelete(void *row, char *str, int32_t size, int32_t *s SVgObj *pVgroup = mgmtGetVgroup(pTable->vgId); if (pVgroup == NULL) { - mError("id:%s not in vgroup:%d", pTable->tableId, pTable->vgId); return NULL; } @@ -224,7 +223,7 @@ int32_t mgmtInitNormalTables() { tsNormalTableUpdateSize = tObj.updateEnd - (int8_t *)&tObj; tsNormalTableSdb = sdbOpenTable(tsMaxTables, sizeof(SNormalTableObj) + sizeof(SSchema) * TSDB_MAX_COLUMNS, - "ntables", SDB_KEYTYPE_STRING, tsMgmtDirectory, mgmtNormalTableAction); + "ntables", SDB_KEYTYPE_STRING, tsMnodeDir, mgmtNormalTableAction); if (tsNormalTableSdb == NULL) { mError("failed to init ntables data"); return -1; @@ -287,35 +286,31 @@ void mgmtCleanUpNormalTables() { sdbCloseTable(tsNormalTableSdb); } -static void *mgmtBuildCreateNormalTableMsg(SNormalTableObj *pTable, SVgObj *pVgroup) { +void *mgmtBuildCreateNormalTableMsg(SNormalTableObj *pTable) { int32_t totalCols = pTable->numOfColumns; - int32_t contLen = sizeof(SDMCreateTableMsg) + totalCols * sizeof(SSchema) + pTable->sqlLen; + int32_t contLen = sizeof(SMDCreateTableMsg) + totalCols * sizeof(SSchema) + pTable->sqlLen; - SDMCreateTableMsg *pCreateTable = rpcMallocCont(contLen); - if (pCreateTable == NULL) { + SMDCreateTableMsg *pCreate = rpcMallocCont(contLen); + if (pCreate == NULL) { + terrno = TSDB_CODE_SERV_OUT_OF_MEMORY; return NULL; } - memcpy(pCreateTable->tableId, pTable->tableId, TSDB_TABLE_ID_LEN); - pCreateTable->tableType = pTable->type; - pCreateTable->numOfColumns = htons(pTable->numOfColumns); - pCreateTable->numOfTags = htons(0); - pCreateTable->sid = htonl(pTable->sid); - pCreateTable->sversion = htonl(pTable->sversion); - pCreateTable->tagDataLen = htonl(0); - pCreateTable->sqlDataLen = htonl(pTable->sqlLen); - pCreateTable->contLen = htonl(contLen); - pCreateTable->numOfVPeers = htonl(pVgroup->numOfVnodes); - pCreateTable->uid = htobe64(pTable->uid); - pCreateTable->superTableUid = htobe64(0); - pCreateTable->createdTime = htobe64(pTable->createdTime); - - for (int i = 0; i < pVgroup->numOfVnodes; ++i) { - pCreateTable->vpeerDesc[i].ip = htonl(pVgroup->vnodeGid[i].ip); - pCreateTable->vpeerDesc[i].vnode = htonl(pVgroup->vnodeGid[i].vnode); - } - - SSchema *pSchema = (SSchema *) pCreateTable->data; + memcpy(pCreate->tableId, pTable->tableId, TSDB_TABLE_ID_LEN + 1); + pCreate->contLen = htonl(contLen); + pCreate->vgId = htonl(pTable->vgId); + pCreate->tableType = pTable->type; + pCreate->numOfColumns = htons(pTable->numOfColumns); + pCreate->numOfTags = 0; + pCreate->sid = htonl(pTable->sid); + pCreate->sversion = htonl(pTable->sversion); + pCreate->tagDataLen = 0; + pCreate->sqlDataLen = htonl(pTable->sqlLen); + pCreate->uid = htobe64(pTable->uid); + pCreate->superTableUid = 0; + pCreate->createdTime = htobe64(pTable->createdTime); + + SSchema *pSchema = (SSchema *) pCreate->data; memcpy(pSchema, pTable->schema, totalCols * sizeof(SSchema)); for (int32_t col = 0; col < totalCols; ++col) { pSchema->bytes = htons(pSchema->bytes); @@ -323,125 +318,107 @@ static void *mgmtBuildCreateNormalTableMsg(SNormalTableObj *pTable, SVgObj *pVgr pSchema++; } - memcpy(pCreateTable + sizeof(SDMCreateTableMsg) + totalCols * sizeof(SSchema), pTable->sql, pTable->sqlLen); - - return pCreateTable; + memcpy(pCreate + sizeof(SMDCreateTableMsg) + totalCols * sizeof(SSchema), pTable->sql, pTable->sqlLen); + return pCreate; } -int32_t mgmtCreateNormalTable(SCreateTableMsg *pCreate, int32_t contLen, SVgObj *pVgroup, int32_t sid, - SDMCreateTableMsg **pDCreateOut, STableInfo **pTableOut) { +void *mgmtCreateNormalTable(SCMCreateTableMsg *pCreate, SVgObj *pVgroup, int32_t sid) { int32_t numOfTables = sdbGetNumOfRows(tsNormalTableSdb); if (numOfTables >= TSDB_MAX_NORMAL_TABLES) { mError("table:%s, numOfTables:%d exceed maxTables:%d", pCreate->tableId, numOfTables, TSDB_MAX_NORMAL_TABLES); - return TSDB_CODE_TOO_MANY_TABLES; + terrno = TSDB_CODE_TOO_MANY_TABLES; + return NULL; } SNormalTableObj *pTable = (SNormalTableObj *) calloc(sizeof(SNormalTableObj), 1); if (pTable == NULL) { mError("table:%s, failed to alloc memory", pCreate->tableId); - return TSDB_CODE_SERV_OUT_OF_MEMORY; + terrno = TSDB_CODE_SERV_OUT_OF_MEMORY; + return NULL; } strcpy(pTable->tableId, pCreate->tableId); - pTable->type = TSDB_TABLE_TYPE_NORMAL_TABLE; - pTable->createdTime = taosGetTimestampMs(); + pTable->type = TSDB_NORMAL_TABLE; pTable->vgId = pVgroup->vgId; - pTable->sid = sid; + pTable->createdTime = taosGetTimestampMs(); pTable->uid = (((uint64_t) pTable->createdTime) << 16) + ((uint64_t) sdbGetVersion() & ((1ul << 16) - 1ul)); + pTable->sid = sid; pTable->sversion = 0; - pTable->numOfColumns = pCreate->numOfColumns; - pTable->sqlLen = pTable->sqlLen; + pTable->numOfColumns = htons(pCreate->numOfColumns); + pTable->sqlLen = htons(pCreate->sqlLen); - int32_t numOfCols = pCreate->numOfColumns; + int32_t numOfCols = pTable->numOfColumns; int32_t schemaSize = numOfCols * sizeof(SSchema); pTable->schema = (SSchema *) calloc(1, schemaSize); if (pTable->schema == NULL) { free(pTable); - return TSDB_CODE_SERV_OUT_OF_MEMORY; + terrno = TSDB_CODE_SERV_OUT_OF_MEMORY; + return NULL; } memcpy(pTable->schema, pCreate->schema, numOfCols * sizeof(SSchema)); pTable->nextColId = 0; - for (int32_t col = 0; col < pCreate->numOfColumns; col++) { - SSchema *tschema = (SSchema *) pTable->schema; + for (int32_t col = 0; col < numOfCols; col++) { + SSchema *tschema = pTable->schema; tschema[col].colId = pTable->nextColId++; + tschema[col].bytes = htons(tschema[col].bytes); } - pTable->sqlLen = pCreate->sqlLen; if (pTable->sqlLen != 0) { - pTable->type = TSDB_TABLE_TYPE_STREAM_TABLE; + pTable->type = TSDB_STREAM_TABLE; pTable->sql = calloc(1, pTable->sqlLen); if (pTable->sql == NULL) { free(pTable); - return TSDB_CODE_SERV_OUT_OF_MEMORY; + terrno = TSDB_CODE_SERV_OUT_OF_MEMORY; + return NULL; } - memcpy(pTable->sql, (char *) (pCreate->schema) + numOfCols * sizeof(SSchema), pCreate->sqlLen); - pTable->sql[pCreate->sqlLen - 1] = 0; - mTrace("table:%s, stream sql len:%d sql:%s", pCreate->tableId, pCreate->sqlLen, pTable->sql); + memcpy(pTable->sql, (char *) (pCreate->schema) + numOfCols * sizeof(SSchema), pTable->sqlLen); + pTable->sql[pTable->sqlLen - 1] = 0; + mTrace("table:%s, stream sql len:%d sql:%s", pTable->tableId, pTable->sqlLen, pTable->sql); } if (sdbInsertRow(tsNormalTableSdb, pTable, 0) < 0) { - mError("table:%s, update sdb error", pCreate->tableId); - return TSDB_CODE_SDB_ERROR; - } - - *pDCreateOut = mgmtBuildCreateNormalTableMsg(pTable, pVgroup); - if (*pDCreateOut == NULL) { - mError("table:%s, failed to build create table message", pCreate->tableId); - return TSDB_CODE_SERV_OUT_OF_MEMORY; + mError("table:%s, update sdb error", pTable->tableId); + free(pTable); + terrno = TSDB_CODE_SDB_ERROR; + return NULL; } - *pTableOut = (STableInfo *) pTable; - - mTrace("table:%s, create table in vgroup, vgroup:%d sid:%d vnode:%d uid:%" PRIu64 , - pTable->tableId, pVgroup->vgId, sid, pVgroup->vnodeGid[0].vnode, pTable->uid); - - return TSDB_CODE_SUCCESS; + mTrace("table:%s, create ntable in vgroup, uid:%" PRIu64 , pTable->tableId, pTable->uid); + return pTable; } -int32_t mgmtDropNormalTable(SDbObj *pDb, SNormalTableObj *pTable) { +int32_t mgmtDropNormalTable(SQueuedMsg *newMsg, SNormalTableObj *pTable) { SVgObj *pVgroup = mgmtGetVgroup(pTable->vgId); if (pVgroup == NULL) { mError("table:%s, failed to drop normal table, vgroup not exist", pTable->tableId); return TSDB_CODE_OTHERS; } - SMDDropTableMsg *pRemove = rpcMallocCont(sizeof(SMDDropTableMsg)); - if (pRemove == NULL) { + SMDDropTableMsg *pDrop = rpcMallocCont(sizeof(SMDDropTableMsg)); + if (pDrop == NULL) { mError("table:%s, failed to drop normal table, no enough memory", pTable->tableId); return TSDB_CODE_SERV_OUT_OF_MEMORY; } - strcpy(pRemove->tableId, pTable->tableId); - pRemove->sid = htonl(pTable->sid); - pRemove->uid = htobe64(pTable->uid); - - pRemove->numOfVPeers = htonl(pVgroup->numOfVnodes); - for (int i = 0; i < pVgroup->numOfVnodes; ++i) { - pRemove->vpeerDesc[i].ip = htonl(pVgroup->vnodeGid[i].ip); - pRemove->vpeerDesc[i].vnode = htonl(pVgroup->vnodeGid[i].vnode); - } + strcpy(pDrop->tableId, pTable->tableId); + pDrop->contLen = htonl(sizeof(SMDDropTableMsg)); + pDrop->vgId = htonl(pVgroup->vgId); + pDrop->sid = htonl(pTable->sid); + pDrop->uid = htobe64(pTable->uid); SRpcIpSet ipSet = mgmtGetIpSetFromVgroup(pVgroup); - mTrace("table:%s, send drop table msg", pRemove->tableId); + mTrace("table:%s, send drop table msg", pDrop->tableId); SRpcMsg rpcMsg = { - .handle = 0, - .pCont = pRemove, + .handle = newMsg, + .pCont = pDrop, .contLen = sizeof(SMDDropTableMsg), .code = 0, .msgType = TSDB_MSG_TYPE_MD_DROP_TABLE }; - mgmtSendMsgToDnode(&ipSet, &rpcMsg); - - if (sdbDeleteRow(tsNormalTableSdb, pTable) < 0) { - mError("table:%s, update ntables sdb error", pTable->tableId); - return TSDB_CODE_SDB_ERROR; - } - - if (pVgroup->numOfTables <= 0) { - mgmtDropVgroup(pDb, pVgroup); - } + newMsg->ahandle = pTable; + mgmtSendMsgToDnode(&ipSet, &rpcMsg); return TSDB_CODE_SUCCESS; } @@ -544,16 +521,18 @@ static int32_t mgmtSetSchemaFromNormalTable(SSchema *pSchema, SNormalTableObj *p return numOfCols * sizeof(SSchema); } -int32_t mgmtGetNormalTableMeta(SDbObj *pDb, SNormalTableObj *pTable, STableMeta *pMeta, bool usePublicIp) { +int32_t mgmtGetNormalTableMeta(SDbObj *pDb, SNormalTableObj *pTable, STableMetaMsg *pMeta, bool usePublicIp) { pMeta->uid = htobe64(pTable->uid); pMeta->sid = htonl(pTable->sid); - pMeta->vgid = htonl(pTable->vgId); + pMeta->vgId = htonl(pTable->vgId); pMeta->sversion = htons(pTable->sversion); pMeta->precision = pDb->cfg.precision; pMeta->numOfTags = 0; pMeta->numOfColumns = htons(pTable->numOfColumns); pMeta->tableType = pTable->type; - pMeta->contLen = sizeof(STableMeta) + mgmtSetSchemaFromNormalTable(pMeta->schema, pTable); + pMeta->contLen = sizeof(STableMetaMsg) + mgmtSetSchemaFromNormalTable(pMeta->schema, pTable); + + strncpy(pMeta->tableId, pTable->tableId, tListLen(pTable->tableId)); SVgObj *pVgroup = mgmtGetVgroup(pTable->vgId); if (pVgroup == NULL) { @@ -562,14 +541,35 @@ int32_t mgmtGetNormalTableMeta(SDbObj *pDb, SNormalTableObj *pTable, STableMeta for (int32_t i = 0; i < TSDB_VNODES_SUPPORT; ++i) { if (usePublicIp) { pMeta->vpeerDesc[i].ip = pVgroup->vnodeGid[i].publicIp; - pMeta->vpeerDesc[i].vnode = htonl(pVgroup->vnodeGid[i].vnode); } else { - pMeta->vpeerDesc[i].ip = pVgroup->vnodeGid[i].ip; - pMeta->vpeerDesc[i].vnode = htonl(pVgroup->vnodeGid[i].vnode); + pMeta->vpeerDesc[i].ip = pVgroup->vnodeGid[i].privateIp; } + + pMeta->vpeerDesc[i].vnode = htonl(pVgroup->vnodeGid[i].vnode); } pMeta->numOfVpeers = pVgroup->numOfVnodes; return TSDB_CODE_SUCCESS; } +void mgmtDropAllNormalTables(SDbObj *pDropDb) { + void *pNode = NULL; + void *pLastNode = NULL; + int32_t numOfTables = 0; + int32_t dbNameLen = strlen(pDropDb->name); + SNormalTableObj *pTable = NULL; + + while (1) { + pNode = sdbFetchRow(tsNormalTableSdb, pNode, (void **)&pTable); + if (pTable == NULL) break; + + if (strncmp(pDropDb->name, pTable->tableId, dbNameLen) == 0) { + sdbDeleteRow(tsNormalTableSdb, pTable); + pNode = pLastNode; + numOfTables ++; + continue; + } + } + + mTrace("db:%s, all normal tables:%d is dropped", pDropDb->name, numOfTables); +} diff --git a/src/mnode/src/mgmtProfile.c b/src/mnode/src/mgmtProfile.c index eafa860596d9c5b3398237ebce82f8bf0c6464e5..b20e66dd874b6b589087b37e66f463e5d32aee89 100644 --- a/src/mnode/src/mgmtProfile.c +++ b/src/mnode/src/mgmtProfile.c @@ -16,13 +16,12 @@ #define _DEFAULT_SOURCE #include "os.h" #include "taosmsg.h" -#include "tschemautil.h" #include "mgmtMnode.h" #include "mgmtProfile.h" #include "mgmtShell.h" #include "mgmtUser.h" -int32_t mgmtSaveQueryStreamList(SHeartBeatMsg *pHBMsg); +int32_t mgmtSaveQueryStreamList(SCMHeartBeatMsg *pHBMsg); int32_t mgmtKillQuery(char *qidstr, void *pConn); int32_t mgmtKillStream(char *qidstr, void *pConn); @@ -63,7 +62,7 @@ typedef struct { SStreamDesc sdesc[]; } SStreamShow; -int32_t mgmtSaveQueryStreamList(SHeartBeatMsg *pHBMsg) { +int32_t mgmtSaveQueryStreamList(SCMHeartBeatMsg *pHBMsg) { // SAcctObj *pAcct = pConn->pAcct; // // if (contLen <= 0 || pAcct == NULL) { @@ -137,10 +136,10 @@ int32_t mgmtGetQueries(SShowObj *pShow, void *pConn) { return 0; } -int32_t mgmtGetQueryMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { +int32_t mgmtGetQueryMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { int32_t cols = 0; - SSchema *pSchema = tsGetSchema(pMeta); + SSchema *pSchema = pMeta->schema; pShow->bytes[cols] = TSDB_USER_LEN; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; @@ -336,9 +335,9 @@ int32_t mgmtGetStreams(SShowObj *pShow, void *pConn) { return 0; } -int32_t mgmtGetStreamMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { +int32_t mgmtGetStreamMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { int32_t cols = 0; - SSchema *pSchema = tsGetSchema(pMeta); + SSchema *pSchema = pMeta->schema; pShow->bytes[cols] = TSDB_USER_LEN; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; @@ -558,9 +557,11 @@ bool mgmtCheckQhandle(uint64_t qhandle) { } void mgmtSaveQhandle(void *qhandle) { + mTrace("qhandle:%p is allocated", qhandle); } void mgmtFreeQhandle(void *qhandle) { + mTrace("qhandle:%p is freed", qhandle); } int mgmtGetConns(SShowObj *pShow, void *pConn) { @@ -598,11 +599,11 @@ int mgmtGetConns(SShowObj *pShow, void *pConn) { return 0; } -int32_t mgmtGetConnsMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { +int32_t mgmtGetConnsMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { int32_t cols = 0; pShow->bytes[cols] = TSDB_TABLE_NAME_LEN; - SSchema *pSchema = tsGetSchema(pMeta); + SSchema *pSchema = pMeta->schema; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; strcpy(pSchema[cols].name, "user"); @@ -673,72 +674,72 @@ int32_t mgmtRetrieveConns(SShowObj *pShow, char *data, int32_t rows, void *pConn return numOfRows; } -void mgmtProcessKillQueryMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - if (mgmtCheckRedirect(rpcMsg->handle)) return; +void mgmtProcessKillQueryMsg(SQueuedMsg *pMsg) { + SRpcMsg rpcRsp = {.handle = pMsg->thandle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; + if (mgmtCheckRedirect(pMsg->thandle)) return; - SUserObj *pUser = mgmtGetUserFromConn(rpcMsg->handle); + SUserObj *pUser = mgmtGetUserFromConn(pMsg->thandle); if (pUser == NULL) { rpcRsp.code = TSDB_CODE_INVALID_USER; rpcSendResponse(&rpcRsp); return; } - SKillQueryMsg *pKill = (SKillQueryMsg *) rpcMsg->pCont; + SCMKillQueryMsg *pKill = pMsg->pCont; int32_t code; if (!pUser->writeAuth) { code = TSDB_CODE_NO_RIGHTS; } else { - code = mgmtKillQuery(pKill->queryId, rpcMsg->handle); + code = mgmtKillQuery(pKill->queryId, pMsg->thandle); } rpcRsp.code = code; rpcSendResponse(&rpcRsp); } -void mgmtProcessKillStreamMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - if (mgmtCheckRedirect(rpcMsg->handle)) return; +void mgmtProcessKillStreamMsg(SQueuedMsg *pMsg) { + SRpcMsg rpcRsp = {.handle = pMsg->thandle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; + if (mgmtCheckRedirect(pMsg->thandle)) return; - SUserObj *pUser = mgmtGetUserFromConn(rpcMsg->handle); + SUserObj *pUser = mgmtGetUserFromConn(pMsg->thandle); if (pUser == NULL) { rpcRsp.code = TSDB_CODE_INVALID_USER; rpcSendResponse(&rpcRsp); return; } - SKillStreamMsg *pKill = (SKillStreamMsg *) rpcMsg->pCont; + SCMKillStreamMsg *pKill = pMsg->pCont; int32_t code; if (!pUser->writeAuth) { code = TSDB_CODE_NO_RIGHTS; } else { - code = mgmtKillStream(pKill->queryId, rpcMsg->handle); + code = mgmtKillStream(pKill->queryId, pMsg->thandle); } rpcRsp.code = code; rpcSendResponse(&rpcRsp); } -void mgmtProcessKillConnectionMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - if (mgmtCheckRedirect(rpcMsg->handle)) return; +void mgmtProcessKillConnectionMsg(SQueuedMsg *pMsg) { + SRpcMsg rpcRsp = {.handle = pMsg->thandle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; + if (mgmtCheckRedirect(pMsg->thandle)) return; - SUserObj *pUser = mgmtGetUserFromConn(rpcMsg->handle); + SUserObj *pUser = mgmtGetUserFromConn(pMsg->thandle); if (pUser == NULL) { rpcRsp.code = TSDB_CODE_INVALID_USER; rpcSendResponse(&rpcRsp); return; } - SKillConnectionMsg *pKill = (SKillConnectionMsg *) rpcMsg->pCont; + SCMKillConnMsg *pKill = pMsg->pCont; int32_t code; if (!pUser->writeAuth) { code = TSDB_CODE_NO_RIGHTS; } else { - code = mgmtKillConnection(pKill->queryId, rpcMsg->handle); + code = mgmtKillConnection(pKill->queryId, pMsg->thandle); } rpcRsp.code = code; @@ -752,9 +753,9 @@ int32_t mgmtInitProfile() { mgmtAddShellShowRetrieveHandle(TSDB_MGMT_TABLE_CONNS, mgmtRetrieveConns); mgmtAddShellShowMetaHandle(TSDB_MGMT_TABLE_STREAMS, mgmtGetStreamMeta); mgmtAddShellShowRetrieveHandle(TSDB_MGMT_TABLE_STREAMS, mgmtRetrieveStreams); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_KILL_QUERY, mgmtProcessKillQueryMsg); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_KILL_STREAM, mgmtProcessKillStreamMsg); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_KILL_CONNECTION, mgmtProcessKillConnectionMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_KILL_QUERY, mgmtProcessKillQueryMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_KILL_STREAM, mgmtProcessKillStreamMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_KILL_CONN, mgmtProcessKillConnectionMsg); return 0; } diff --git a/src/mnode/src/mgmtShell.c b/src/mnode/src/mgmtShell.c index be8de5b33142eecacdf7f02de20441ba7c1d9b6b..9cf0e3b57338f113996579d9cb129013691fdea9 100644 --- a/src/mnode/src/mgmtShell.c +++ b/src/mnode/src/mgmtShell.c @@ -21,7 +21,7 @@ #include "trpc.h" #include "tstatus.h" #include "tsched.h" -#include "dnodeSystem.h" +#include "dnode.h" #include "mnode.h" #include "mgmtAcct.h" #include "mgmtBalance.h" @@ -38,26 +38,32 @@ #include "mgmtUser.h" #include "mgmtVgroup.h" -typedef int32_t (*SShowMetaFp)(STableMeta *pMeta, SShowObj *pShow, void *pConn); +typedef int32_t (*SShowMetaFp)(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); typedef int32_t (*SShowRetrieveFp)(SShowObj *pShow, char *data, int32_t rows, void *pConn); -static void mgmtProcessMsgFromShell(SRpcMsg *pMsg); -static void mgmtProcessShowMsg(SRpcMsg *rpcMsg); -static void mgmtProcessRetrieveMsg(SRpcMsg *rpcMsg); -static void mgmtProcessUnSupportMsg(SRpcMsg *rpcMsg); static int mgmtShellRetriveAuth(char *user, char *spi, char *encrypt, char *secret, char *ckey); static bool mgmtCheckMsgReadOnly(int8_t type, void *pCont); -static void mgmtProcessHeartBeatMsg(SRpcMsg *rpcMsg); -static void mgmtProcessConnectMsg(SRpcMsg *rpcMsg); +static void mgmtProcessMsgFromShell(SRpcMsg *pMsg); +static void mgmtProcessUnSupportMsg(SRpcMsg *rpcMsg); +static void mgmtProcessMsgWhileNotReady(SRpcMsg *rpcMsg); +static void mgmtProcessShowMsg(SQueuedMsg *queuedMsg); +static void mgmtProcessRetrieveMsg(SQueuedMsg *queuedMsg); +static void mgmtProcessHeartBeatMsg(SQueuedMsg *queuedMsg); +static void mgmtProcessConnectMsg(SQueuedMsg *queuedMsg); static void *tsMgmtShellRpc = NULL; -static void (*tsMgmtProcessShellMsgFp[TSDB_MSG_TYPE_MAX])(SRpcMsg *) = {0}; +static void *tsMgmtTranQhandle = NULL; +static void (*tsMgmtProcessShellMsgFp[TSDB_MSG_TYPE_MAX])(SQueuedMsg *) = {0}; static SShowMetaFp tsMgmtShowMetaFp[TSDB_MGMT_TABLE_MAX] = {0}; static SShowRetrieveFp tsMgmtShowRetrieveFp[TSDB_MGMT_TABLE_MAX] = {0}; int32_t mgmtInitShell() { - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_SHOW, mgmtProcessShowMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_SHOW, mgmtProcessShowMsg); mgmtAddShellMsgHandle(TSDB_MSG_TYPE_RETRIEVE, mgmtProcessRetrieveMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_HEARTBEAT, mgmtProcessHeartBeatMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_CONNECT, mgmtProcessConnectMsg); + + tsMgmtTranQhandle = taosInitScheduler(tsMaxDnodes + tsMaxShellConns, 1, "mnodeT"); int32_t numOfThreads = tsNumOfCores * tsNumOfThreadsPerCore / 4.0; if (numOfThreads < 1) { @@ -66,7 +72,7 @@ int32_t mgmtInitShell() { SRpcInit rpcInit = {0}; rpcInit.localIp = tsAnyIp ? "0.0.0.0" : tsPrivateIp; - rpcInit.localPort = tsMgmtShellPort; + rpcInit.localPort = tsMnodeShellPort; rpcInit.label = "MND-shell"; rpcInit.numOfThreads = numOfThreads; rpcInit.cfp = mgmtProcessMsgFromShell; @@ -81,14 +87,16 @@ int32_t mgmtInitShell() { return -1; } - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_HEARTBEAT, mgmtProcessHeartBeatMsg); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CONNECT, mgmtProcessConnectMsg); - mPrint("server connection to shell is opened"); return 0; } void mgmtCleanUpShell() { + if (tsMgmtTranQhandle) { + taosCleanUpScheduler(tsMgmtTranQhandle); + tsMgmtTranQhandle = NULL; + } + if (tsMgmtShellRpc) { rpcClose(tsMgmtShellRpc); tsMgmtShellRpc = NULL; @@ -96,7 +104,7 @@ void mgmtCleanUpShell() { } } -void mgmtAddShellMsgHandle(uint8_t showType, void (*fp)(SRpcMsg *rpcMsg)) { +void mgmtAddShellMsgHandle(uint8_t showType, void (*fp)(SQueuedMsg *queuedMsg)) { tsMgmtProcessShellMsgFp[showType] = fp; } @@ -109,102 +117,118 @@ void mgmtAddShellShowRetrieveHandle(uint8_t msgType, SShowRetrieveFp fp) { } void mgmtProcessTranRequest(SSchedMsg *sched) { - SRpcMsg *rpcMsg = sched->msg; - (*tsMgmtProcessShellMsgFp[rpcMsg->msgType])(rpcMsg); - rpcFreeCont(rpcMsg->pCont); + SQueuedMsg *queuedMsg = sched->msg; + (*tsMgmtProcessShellMsgFp[queuedMsg->msgType])(queuedMsg); + rpcFreeCont(queuedMsg->pCont); + free(queuedMsg); } -void mgmtAddToTranRequest(SRpcMsg *rpcMsg) { +void mgmtAddToShellQueue(SQueuedMsg *queuedMsg) { SSchedMsg schedMsg; - schedMsg.msg = rpcMsg; - schedMsg.fp = mgmtProcessTranRequest; + schedMsg.msg = queuedMsg; + schedMsg.fp = mgmtProcessTranRequest; taosScheduleTask(tsMgmtTranQhandle, &schedMsg); } static void mgmtProcessMsgFromShell(SRpcMsg *rpcMsg) { if (sdbGetRunStatus() != SDB_STATUS_SERVING) { - mTrace("shell msg is ignored since SDB is not ready"); - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = TSDB_CODE_NOT_READY, .msgType = 0}; - rpcSendResponse(&rpcRsp); + mgmtProcessMsgWhileNotReady(rpcMsg); rpcFreeCont(rpcMsg->pCont); return; } - if (tsMgmtProcessShellMsgFp[rpcMsg->msgType]) { - if (mgmtCheckMsgReadOnly(rpcMsg->msgType, rpcMsg->pCont)) { - (*tsMgmtProcessShellMsgFp[rpcMsg->msgType])(rpcMsg); - rpcFreeCont(rpcMsg->pCont); - } else { - mgmtAddToTranRequest(rpcMsg); - } - } else { - mError("%s is not processed", taosMsg[rpcMsg->msgType]); + if (tsMgmtProcessShellMsgFp[rpcMsg->msgType] == NULL) { mgmtProcessUnSupportMsg(rpcMsg); rpcFreeCont(rpcMsg->pCont); + return; + } + + SUserObj *pUser = mgmtGetUserFromConn(rpcMsg->handle); + if (pUser == NULL) { + mgmtSendSimpleResp(rpcMsg->handle, TSDB_CODE_INVALID_USER); + rpcFreeCont(rpcMsg->pCont); + return; } -} -static void mgmtProcessShowMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; + if (mgmtCheckMsgReadOnly(rpcMsg->msgType, rpcMsg->pCont)) { + SQueuedMsg queuedMsg = {0}; + queuedMsg.thandle = rpcMsg->handle; + queuedMsg.msgType = rpcMsg->msgType; + queuedMsg.contLen = rpcMsg->contLen; + queuedMsg.pCont = rpcMsg->pCont; + queuedMsg.pUser = pUser; + (*tsMgmtProcessShellMsgFp[rpcMsg->msgType])(&queuedMsg); + rpcFreeCont(rpcMsg->pCont); + } else { + SQueuedMsg *queuedMsg = calloc(1, sizeof(SQueuedMsg)); + queuedMsg->thandle = rpcMsg->handle; + queuedMsg->msgType = rpcMsg->msgType; + queuedMsg->contLen = rpcMsg->contLen; + queuedMsg->pCont = rpcMsg->pCont; + queuedMsg->pUser = pUser; + mgmtAddToShellQueue(queuedMsg); + } +} - SShowMsg *pShowMsg = rpcMsg->pCont; +static void mgmtProcessShowMsg(SQueuedMsg *pMsg) { + SCMShowMsg *pShowMsg = pMsg->pCont; if (pShowMsg->type == TSDB_MGMT_TABLE_DNODE || TSDB_MGMT_TABLE_GRANTS || TSDB_MGMT_TABLE_SCORES) { - if (mgmtCheckRedirect(rpcMsg->handle) != TSDB_CODE_SUCCESS) { + if (mgmtCheckRedirect(pMsg->thandle)) { return; } } - int32_t size = sizeof(SShowRsp) + sizeof(SSchema) * TSDB_MAX_COLUMNS + TSDB_EXTRA_PAYLOAD_SIZE; - SShowRsp *pShowRsp = rpcMallocCont(size); + if (pShowMsg->type >= TSDB_MGMT_TABLE_MAX) { + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_INVALID_MSG_TYPE); + return; + } + + if (!tsMgmtShowMetaFp[pShowMsg->type]) { + mError("show type:%s is not support", taosGetShowTypeStr(pShowMsg->type)); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_OPS_NOT_SUPPORT); + return; + } + + int32_t size = sizeof(SCMShowRsp) + sizeof(SSchema) * TSDB_MAX_COLUMNS + TSDB_EXTRA_PAYLOAD_SIZE; + SCMShowRsp *pShowRsp = rpcMallocCont(size); if (pShowRsp == NULL) { - rpcRsp.code = TSDB_CODE_SERV_OUT_OF_MEMORY; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_SERV_OUT_OF_MEMORY); return; } - int32_t code; - if (pShowMsg->type >= TSDB_MGMT_TABLE_MAX) { - code = TSDB_CODE_INVALID_MSG_TYPE; + SShowObj *pShow = (SShowObj *) calloc(1, sizeof(SShowObj) + htons(pShowMsg->payloadLen)); + pShow->signature = pShow; + pShow->type = pShowMsg->type; + pShow->payloadLen = htons(pShowMsg->payloadLen); + strcpy(pShow->db, pShowMsg->db); + memcpy(pShow->payload, pShowMsg->payload, pShow->payloadLen); + + mgmtSaveQhandle(pShow); + pShowRsp->qhandle = htobe64((uint64_t) pShow); + + mTrace("show:%p, type:%s, start to get meta", pShow, taosGetShowTypeStr(pShowMsg->type)); + int32_t code = (*tsMgmtShowMetaFp[pShowMsg->type])(&pShowRsp->tableMeta, pShow, pMsg->thandle); + if (code == 0) { + SRpcMsg rpcRsp = { + .handle = pMsg->thandle, + .pCont = pShowRsp, + .contLen = sizeof(SCMShowRsp) + sizeof(SSchema) * pShow->numOfColumns, + .code = code, + .msgType = 0 + }; + rpcSendResponse(&rpcRsp); } else { - SShowObj *pShow = (SShowObj *) calloc(1, sizeof(SShowObj) + htons(pShowMsg->payloadLen)); - pShow->signature = pShow; - pShow->type = pShowMsg->type; - strcpy(pShow->db, pShowMsg->db); - mTrace("pShow:%p is allocated", pShow); - - // set the table name query condition - pShow->payloadLen = htons(pShowMsg->payloadLen); - memcpy(pShow->payload, pShowMsg->payload, pShow->payloadLen); - - mgmtSaveQhandle(pShow); - pShowRsp->qhandle = htobe64((uint64_t) pShow); - if (tsMgmtShowMetaFp[pShowMsg->type]) { - code = (*tsMgmtShowMetaFp[(uint8_t) pShowMsg->type])(&pShowRsp->tableMeta, pShow, rpcMsg->handle); - if (code == 0) { - size = sizeof(SShowRsp) + sizeof(SSchema) * pShow->numOfColumns; - } else { - mError("pShow:%p, type:%d %s, failed to get Meta, code:%d", pShow, pShowMsg->type, - taosMsg[(uint8_t) pShowMsg->type], code); - free(pShow); - } - } else { - code = TSDB_CODE_OPS_NOT_SUPPORT; - } + mError("show:%p, type:%s, failed to get meta, reason:%s", pShow, taosGetShowTypeStr(pShowMsg->type), tstrerror(code)); + mgmtFreeQhandle(pShow); + rpcFreeCont(pShowRsp); } - - rpcRsp.code = code; - rpcRsp.pCont = pShowRsp; - rpcRsp.contLen = size; - rpcSendResponse(&rpcRsp); } -static void mgmtProcessRetrieveMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - +static void mgmtProcessRetrieveMsg(SQueuedMsg *pMsg) { int32_t rowsToRead = 0; int32_t size = 0; int32_t rowsRead = 0; - SRetrieveTableMsg *pRetrieve = (SRetrieveTableMsg *) rpcMsg->pCont; + SRetrieveTableMsg *pRetrieve = pMsg->pCont; pRetrieve->qhandle = htobe64(pRetrieve->qhandle); /* @@ -213,16 +237,16 @@ static void mgmtProcessRetrieveMsg(SRpcMsg *rpcMsg) { */ if (!mgmtCheckQhandle(pRetrieve->qhandle)) { mError("retrieve:%p, qhandle:%p is invalid", pRetrieve, pRetrieve->qhandle); - rpcRsp.code = TSDB_CODE_INVALID_QHANDLE; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_INVALID_QHANDLE); return; } SShowObj *pShow = (SShowObj *)pRetrieve->qhandle; - if (pShow->signature != (void *)pShow) { - mError("pShow:%p, signature:%p, query memory is corrupted", pShow, pShow->signature); - rpcRsp.code = TSDB_CODE_MEMORY_CORRUPTED; - rpcSendResponse(&rpcRsp); + mTrace("show:%p, type:%s, retrieve data", pShow, taosGetShowTypeStr(pShow->type)); + + if (!mgmtCheckQhandle(pRetrieve->qhandle)) { + mError("pShow:%p, query memory is corrupted", pShow); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_MEMORY_CORRUPTED); return; } else { if ((pRetrieve->free & TSDB_QUERY_TYPE_FREE_RESOURCE) != TSDB_QUERY_TYPE_FREE_RESOURCE) { @@ -245,10 +269,9 @@ static void mgmtProcessRetrieveMsg(SRpcMsg *rpcMsg) { // if free flag is set, client wants to clean the resources if ((pRetrieve->free & TSDB_QUERY_TYPE_FREE_RESOURCE) != TSDB_QUERY_TYPE_FREE_RESOURCE) - rowsRead = (*tsMgmtShowRetrieveFp[(uint8_t) pShow->type])(pShow, pRsp->data, rowsToRead, rpcMsg->handle); + rowsRead = (*tsMgmtShowRetrieveFp[pShow->type])(pShow, pRsp->data, rowsToRead, pMsg->thandle); - if (rowsRead < 0) { - rowsRead = 0; // TSDB_CODE_ACTION_IN_PROGRESS; + if (rowsRead < 0) { // TSDB_CODE_ACTION_IN_PROGRESS; rpcFreeCont(pRsp); return; } @@ -256,8 +279,13 @@ static void mgmtProcessRetrieveMsg(SRpcMsg *rpcMsg) { pRsp->numOfRows = htonl(rowsRead); pRsp->precision = htonl(TSDB_TIME_PRECISION_MILLI); // millisecond time precision - rpcRsp.pCont = pRsp; - rpcRsp.contLen = size; + SRpcMsg rpcRsp = { + .handle = pMsg->thandle, + .pCont = pRsp, + .contLen = size, + .code = 0, + .msgType = 0 + }; rpcSendResponse(&rpcRsp); if (rowsToRead == 0) { @@ -265,23 +293,24 @@ static void mgmtProcessRetrieveMsg(SRpcMsg *rpcMsg) { } } -static void mgmtProcessHeartBeatMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - //SHeartBeatMsg *pHBMsg = (SHeartBeatMsg *) rpcMsg->pCont; +static void mgmtProcessHeartBeatMsg(SQueuedMsg *pMsg) { + //SCMHeartBeatMsg *pHBMsg = (SCMHeartBeatMsg *) rpcMsg->pCont; //mgmtSaveQueryStreamList(pHBMsg); - SHeartBeatRsp *pHBRsp = (SHeartBeatRsp *) rpcMallocCont(sizeof(SHeartBeatRsp)); + SCMHeartBeatRsp *pHBRsp = (SCMHeartBeatRsp *) rpcMallocCont(sizeof(SCMHeartBeatRsp)); if (pHBRsp == NULL) { - rpcRsp.code = TSDB_CODE_SERV_OUT_OF_MEMORY; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_SERV_OUT_OF_MEMORY); return; } SRpcConnInfo connInfo; - rpcGetConnInfo(rpcMsg->handle, &connInfo); + if (rpcGetConnInfo(pMsg->thandle, &connInfo) != 0) { + mError("conn:%p is already released while process heart beat msg", pMsg->thandle); + return; + } pHBRsp->ipList.inUse = 0; - pHBRsp->ipList.port = htons(tsMgmtShellPort); + pHBRsp->ipList.port = htons(tsMnodeShellPort); pHBRsp->ipList.numOfIps = 0; if (pSdbPublicIpList != NULL && pSdbIpList != NULL) { pHBRsp->ipList.numOfIps = htons(pSdbPublicIpList->numOfIps); @@ -304,8 +333,13 @@ static void mgmtProcessHeartBeatMsg(SRpcMsg *rpcMsg) { pHBRsp->streamId = 0; pHBRsp->killConnection = 0; - rpcRsp.pCont = pHBRsp; - rpcRsp.contLen = sizeof(SHeartBeatRsp); + SRpcMsg rpcRsp = { + .handle = pMsg->thandle, + .pCont = pHBRsp, + .contLen = sizeof(SCMHeartBeatRsp), + .code = 0, + .msgType = 0 + }; rpcSendResponse(&rpcRsp); } @@ -324,14 +358,17 @@ static int mgmtShellRetriveAuth(char *user, char *spi, char *encrypt, char *secr } } -static void mgmtProcessConnectMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - SConnectMsg *pConnectMsg = (SConnectMsg *) rpcMsg->pCont; +static void mgmtProcessConnectMsg(SQueuedMsg *pMsg) { + SRpcMsg rpcRsp = {.handle = pMsg->thandle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; + SCMConnectMsg *pConnectMsg = pMsg->pCont; SRpcConnInfo connInfo; - rpcGetConnInfo(rpcMsg->handle, &connInfo); - int32_t code; + if (rpcGetConnInfo(pMsg->thandle, &connInfo) != 0) { + mError("thandle:%p is already released while process connect msg", pMsg->thandle); + return; + } + int32_t code; SUserObj *pUser = mgmtGetUser(connInfo.user); if (pUser == NULL) { code = TSDB_CODE_INVALID_USER; @@ -355,7 +392,7 @@ static void mgmtProcessConnectMsg(SRpcMsg *rpcMsg) { } if (pConnectMsg->db[0]) { - char dbName[TSDB_TABLE_ID_LEN] = {0}; + char dbName[TSDB_TABLE_ID_LEN * 3] = {0}; sprintf(dbName, "%x%s%s", pAcct->acctId, TS_PATH_DELIMITER, pConnectMsg->db); SDbObj *pDb = mgmtGetDb(dbName); if (pDb == NULL) { @@ -364,7 +401,7 @@ static void mgmtProcessConnectMsg(SRpcMsg *rpcMsg) { } } - SConnectRsp *pConnectRsp = rpcMallocCont(sizeof(SConnectRsp)); + SCMConnectRsp *pConnectRsp = rpcMallocCont(sizeof(SCMConnectRsp)); if (pConnectRsp == NULL) { code = TSDB_CODE_SERV_OUT_OF_MEMORY; goto connect_over; @@ -375,7 +412,7 @@ static void mgmtProcessConnectMsg(SRpcMsg *rpcMsg) { pConnectRsp->writeAuth = pUser->writeAuth; pConnectRsp->superAuth = pUser->superAuth; pConnectRsp->ipList.inUse = 0; - pConnectRsp->ipList.port = htons(tsMgmtShellPort); + pConnectRsp->ipList.port = htons(tsMnodeShellPort); pConnectRsp->ipList.numOfIps = 0; if (pSdbPublicIpList != NULL && pSdbIpList != NULL) { pConnectRsp->ipList.numOfIps = htons(pSdbPublicIpList->numOfIps); @@ -397,7 +434,7 @@ connect_over: } else { mLPrint("user:%s login from %s, code:%d", connInfo.user, taosIpStr(connInfo.clientIp), code); rpcRsp.pCont = pConnectRsp; - rpcRsp.contLen = sizeof(SConnectRsp); + rpcRsp.contLen = sizeof(SCMConnectRsp); } rpcSendResponse(&rpcRsp); } @@ -406,7 +443,7 @@ connect_over: * check if we need to add mgmtProcessTableMetaMsg into tranQueue, which will be executed one-by-one. */ static bool mgmtCheckMeterMetaMsgType(void *pMsg) { - STableInfoMsg *pInfo = (STableInfoMsg *) pMsg; + SCMTableInfoMsg *pInfo = (SCMTableInfoMsg *) pMsg; int16_t autoCreate = htons(pInfo->createFlag); STableInfo *pTable = mgmtGetTable(pInfo->tableId); @@ -420,10 +457,10 @@ static bool mgmtCheckMeterMetaMsgType(void *pMsg) { } static bool mgmtCheckMsgReadOnly(int8_t type, void *pCont) { - if ((type == TSDB_MSG_TYPE_TABLE_META && (!mgmtCheckMeterMetaMsgType(pCont))) || - type == TSDB_MSG_TYPE_STABLE_META || type == TSDB_MSG_TYPE_RETRIEVE || - type == TSDB_MSG_TYPE_SHOW || type == TSDB_MSG_TYPE_MULTI_TABLE_META || - type == TSDB_MSG_TYPE_CONNECT) { + if ((type == TSDB_MSG_TYPE_CM_TABLE_META && (!mgmtCheckMeterMetaMsgType(pCont))) || + type == TSDB_MSG_TYPE_CM_STABLE_META || type == TSDB_MSG_TYPE_RETRIEVE || + type == TSDB_MSG_TYPE_CM_SHOW || type == TSDB_MSG_TYPE_CM_TABLES_META || + type == TSDB_MSG_TYPE_CM_CONNECT) { return true; } @@ -431,6 +468,7 @@ static bool mgmtCheckMsgReadOnly(int8_t type, void *pCont) { } static void mgmtProcessUnSupportMsg(SRpcMsg *rpcMsg) { + mError("%s is not processed in shell", taosMsg[rpcMsg->msgType]); SRpcMsg rpcRsp = { .msgType = 0, .pCont = 0, @@ -440,3 +478,26 @@ static void mgmtProcessUnSupportMsg(SRpcMsg *rpcMsg) { }; rpcSendResponse(&rpcRsp); } + +static void mgmtProcessMsgWhileNotReady(SRpcMsg *rpcMsg) { + mTrace("%s is ignored since SDB is not ready", taosMsg[rpcMsg->msgType]); + SRpcMsg rpcRsp = { + .msgType = 0, + .pCont = 0, + .contLen = 0, + .code = TSDB_CODE_NOT_READY, + .handle = rpcMsg->handle + }; + rpcSendResponse(&rpcRsp); +} + +void mgmtSendSimpleResp(void *thandle, int32_t code) { + SRpcMsg rpcRsp = { + .msgType = 0, + .pCont = 0, + .contLen = 0, + .code = code, + .handle = thandle + }; + rpcSendResponse(&rpcRsp); +} diff --git a/src/mnode/src/mgmtSuperTable.c b/src/mnode/src/mgmtSuperTable.c index 1d7db53a5e71191068f77437f4c4039b034f6618..bd697ade93288917e38bb1388049d98717bdd9c3 100644 --- a/src/mnode/src/mgmtSuperTable.c +++ b/src/mnode/src/mgmtSuperTable.c @@ -15,15 +15,7 @@ #define _DEFAULT_SOURCE #include "os.h" -#include "taosmsg.h" -#include "tschemautil.h" -#include "tscompression.h" -#include "tskiplist.h" -#include "tsqlfunction.h" -#include "ttime.h" -#include "tstatus.h" -#include "tutil.h" -#include "mnode.h" + #include "mgmtAcct.h" #include "mgmtChildTable.h" #include "mgmtDb.h" @@ -34,6 +26,10 @@ #include "mgmtTable.h" #include "mgmtUser.h" #include "mgmtVgroup.h" +#include "mnode.h" + +#include "name.h" +#include "tsqlfunction.h" static void *tsSuperTableSdb; static int32_t tsSuperTableUpdateSize; @@ -47,7 +43,7 @@ static void *mgmtSuperTableActionDecode(void *row, char *str, int32_t size, int3 static void *mgmtSuperTableActionReset(void *row, char *str, int32_t size, int32_t *ssize); static void *mgmtSuperTableActionDestroy(void *row, char *str, int32_t size, int32_t *ssize); static int32_t mgmtRetrieveShowSuperTables(SShowObj *pShow, char *data, int32_t rows, void *pConn); -static int32_t mgmtGetShowSuperTableMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn); +static int32_t mgmtGetShowSuperTableMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); static void mgmtDestroySuperTable(SSuperTableObj *pTable) { free(pTable->schema); @@ -165,7 +161,7 @@ int32_t mgmtInitSuperTables() { mgmtSuperTableActionInit(); tsSuperTableSdb = sdbOpenTable(tsMaxTables, tsSuperTableUpdateSize + sizeof(SSchema) * TSDB_MAX_COLUMNS, - "stables", SDB_KEYTYPE_STRING, tsMgmtDirectory, mgmtSuperTableAction); + "stables", SDB_KEYTYPE_STRING, tsMnodeDir, mgmtSuperTableAction); if (tsSuperTableSdb == NULL) { mError("failed to init stables data"); return -1; @@ -200,7 +196,7 @@ void mgmtCleanUpSuperTables() { sdbCloseTable(tsSuperTableSdb); } -int32_t mgmtCreateSuperTable(SDbObj *pDb, SCreateTableMsg *pCreate) { +int32_t mgmtCreateSuperTable(SCMCreateTableMsg *pCreate) { int32_t numOfTables = sdbGetNumOfRows(tsSuperTableSdb); if (numOfTables >= TSDB_MAX_SUPER_TABLES) { mError("stable:%s, numOfTables:%d exceed maxTables:%d", pCreate->tableId, numOfTables, TSDB_MAX_SUPER_TABLES); @@ -213,14 +209,14 @@ int32_t mgmtCreateSuperTable(SDbObj *pDb, SCreateTableMsg *pCreate) { } strcpy(pStable->tableId, pCreate->tableId); - pStable->type = TSDB_TABLE_TYPE_SUPER_TABLE; - pStable->createdTime = taosGetTimestampMs(); - pStable->vgId = 0; - pStable->sid = 0; - pStable->uid = (((uint64_t)pStable->createdTime) << 16) + ((uint64_t)sdbGetVersion() & ((1ul << 16) - 1ul)); - pStable->sversion = 0; - pStable->numOfColumns = pCreate->numOfColumns; - pStable->numOfTags = pCreate->numOfTags; + pStable->type = TSDB_SUPER_TABLE; + pStable->createdTime = taosGetTimestampMs(); + pStable->vgId = 0; + pStable->sid = 0; + pStable->uid = (((uint64_t) pStable->createdTime) << 16) + ((uint64_t) sdbGetVersion() & ((1ul << 16) - 1ul)); + pStable->sversion = 0; + pStable->numOfColumns = htons(pCreate->numOfColumns); + pStable->numOfTags = htons(pCreate->numOfTags); int32_t numOfCols = pCreate->numOfColumns + pCreate->numOfTags; int32_t schemaSize = numOfCols * sizeof(SSchema); @@ -233,24 +229,31 @@ int32_t mgmtCreateSuperTable(SDbObj *pDb, SCreateTableMsg *pCreate) { memcpy(pStable->schema, pCreate->schema, numOfCols * sizeof(SSchema)); pStable->nextColId = 0; - for (int32_t col = 0; col < pCreate->numOfColumns; col++) { - SSchema *tschema = (SSchema *)pStable->schema; + for (int32_t col = 0; col < numOfCols; col++) { + SSchema *tschema = pStable->schema; tschema[col].colId = pStable->nextColId++; + tschema[col].bytes = htons(tschema[col].bytes); } if (sdbInsertRow(tsSuperTableSdb, pStable, 0) < 0) { - mError("table:%s, update sdb error", pCreate->tableId); + mError("stable:%s, update sdb error", pStable->tableId); return TSDB_CODE_SDB_ERROR; } + mPrint("stable:%s, is created, tags:%d cols:%d", pStable->tableId, pStable->numOfTags, pStable->numOfColumns); return TSDB_CODE_SUCCESS; } -int32_t mgmtDropSuperTable(SDbObj *pDb, SSuperTableObj *pSuperTable) { - //TODO drop all child tables - - mgmtRemoveSuperTableFromDb(pDb); - return sdbDeleteRow(tsSuperTableSdb, pSuperTable); +int32_t mgmtDropSuperTable(SQueuedMsg *newMsg, SDbObj *pDb, SSuperTableObj *pStable) { + if (pStable->numOfTables != 0) { + mError("stable:%s, numOfTables:%d not 0", pStable->tableId, pStable->numOfTables); + return TSDB_CODE_OTHERS; + } else { + //TODO: drop child tables + mError("stable:%s, is dropped from sdb", pStable->tableId); + mgmtRemoveSuperTableFromDb(pDb); + return TSDB_CODE_OTHERS; + } } void* mgmtGetSuperTable(char *tableId) { @@ -259,7 +262,7 @@ void* mgmtGetSuperTable(char *tableId) { void *mgmtGetSuperTableVgroup(SSuperTableObj *pStable) { //TODO get vgroup of dnodes - SSuperTableInfoRsp *rsp = rpcMallocCont(sizeof(SSuperTableInfoRsp) + sizeof(uint32_t) * mgmtGetDnodesNum()); + SCMSuperTableInfoRsp *rsp = rpcMallocCont(sizeof(SCMSuperTableInfoRsp) + sizeof(uint32_t) * mgmtGetDnodesNum()); rsp->numOfDnodes = 1; rsp->dnodeIps[0] = 0; return rsp; @@ -483,14 +486,14 @@ int32_t mgmtDropSuperTableColumnByName(SSuperTableObj *pStable, char *colName) { return TSDB_CODE_SUCCESS; } -static int32_t mgmtGetShowSuperTableMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { +static int32_t mgmtGetShowSuperTableMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { SDbObj *pDb = mgmtGetDb(pShow->db); if (pDb == NULL) { return TSDB_CODE_DB_NOT_SELECTED; } int32_t cols = 0; - SSchema *pSchema = tsGetSchema(pMeta); + SSchema *pSchema = pMeta->schema; pShow->bytes[cols] = TSDB_TABLE_NAME_LEN; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; @@ -500,7 +503,7 @@ static int32_t mgmtGetShowSuperTableMeta(STableMeta *pMeta, SShowObj *pShow, voi pShow->bytes[cols] = 8; pSchema[cols].type = TSDB_DATA_TYPE_TIMESTAMP; - strcpy(pSchema[cols].name, "created_time"); + strcpy(pSchema[cols].name, "create_time"); pSchema[cols].bytes = htons(pShow->bytes[cols]); cols++; @@ -603,6 +606,30 @@ int32_t mgmtRetrieveShowSuperTables(SShowObj *pShow, char *data, int32_t rows, v return numOfRows; } +void mgmtDropAllSuperTables(SDbObj *pDropDb) { + void *pNode = NULL; + void *pLastNode = NULL; + int32_t numOfTables = 0; + int32_t dbNameLen = strlen(pDropDb->name); + SSuperTableObj *pTable = NULL; + + while (1) { + pNode = sdbFetchRow(tsSuperTableSdb, pNode, (void **)&pTable); + if (pTable == NULL) { + break; + } + + if (strncmp(pDropDb->name, pTable->tableId, dbNameLen) == 0) { + sdbDeleteRow(tsSuperTableSdb, pTable); + pNode = pLastNode; + numOfTables ++; + continue; + } + } + + mTrace("db:%s, all super tables:%d is dropped", pDropDb->name, numOfTables); +} + void mgmtAddTableIntoSuperTable(SSuperTableObj *pStable) { pStable->numOfTables++; } @@ -624,16 +651,16 @@ int32_t mgmtSetSchemaFromSuperTable(SSchema *pSchema, SSuperTableObj *pTable) { return (pTable->numOfColumns + pTable->numOfTags) * sizeof(SSchema); } -int32_t mgmtGetSuperTableMeta(SDbObj *pDb, SSuperTableObj *pTable, STableMeta *pMeta, bool usePublicIp) { +int32_t mgmtGetSuperTableMeta(SDbObj *pDb, SSuperTableObj *pTable, STableMetaMsg *pMeta, bool usePublicIp) { pMeta->uid = htobe64(pTable->uid); pMeta->sid = htonl(pTable->sid); - pMeta->vgid = htonl(pTable->vgId); + pMeta->vgId = htonl(pTable->vgId); pMeta->sversion = htons(pTable->sversion); pMeta->precision = pDb->cfg.precision; pMeta->numOfTags = pTable->numOfTags; pMeta->numOfColumns = htons(pTable->numOfColumns); pMeta->tableType = pTable->type; - pMeta->contLen = sizeof(STableMeta) + mgmtSetSchemaFromSuperTable(pMeta->schema, pTable); + pMeta->contLen = sizeof(STableMetaMsg) + mgmtSetSchemaFromSuperTable(pMeta->schema, pTable); strcpy(pMeta->tableId, pTable->tableId); return TSDB_CODE_SUCCESS; diff --git a/src/mnode/src/mgmtTable.c b/src/mnode/src/mgmtTable.c index c4bbd9b3f2c415b5f87824333bc92e042125669a..2fe6e31306ecb49e81f31b6fab8c3d93843ccab5 100644 --- a/src/mnode/src/mgmtTable.c +++ b/src/mnode/src/mgmtTable.c @@ -15,21 +15,12 @@ #define _DEFAULT_SOURCE #include "os.h" -#include "taoserror.h" -#include "taosmsg.h" -#include "tast.h" -#include "textbuffer.h" -#include "tschemautil.h" -#include "tscompression.h" -#include "tskiplist.h" -#include "tsqlfunction.h" -#include "tstatus.h" -#include "ttime.h" -#include "mnode.h" + +#include "mgmtTable.h" #include "mgmtAcct.h" #include "mgmtChildTable.h" -#include "mgmtDb.h" #include "mgmtDClient.h" +#include "mgmtDb.h" #include "mgmtDnode.h" #include "mgmtGrant.h" #include "mgmtMnode.h" @@ -37,21 +28,38 @@ #include "mgmtProfile.h" #include "mgmtShell.h" #include "mgmtSuperTable.h" -#include "mgmtTable.h" #include "mgmtUser.h" #include "mgmtVgroup.h" +#include "mnode.h" + +#include "qast.h" +#include "qextbuffer.h" +#include "taoserror.h" +#include "taosmsg.h" +#include "tscompression.h" +#include "tskiplist.h" +#include "tsqlfunction.h" +#include "tstatus.h" +#include "ttime.h" +#include "name.h" extern void *tsNormalTableSdb; extern void *tsChildTableSdb; -static void mgmtProcessCreateTableMsg(SRpcMsg *rpcMsg); -static void mgmtProcessDropTableMsg(SRpcMsg *rpcMsg); -static void mgmtProcessAlterTableMsg(SRpcMsg *rpcMsg); -static void mgmtProcessTableMetaMsg(SRpcMsg *rpcMsg); -static void mgmtProcessMultiTableMetaMsg(SRpcMsg *rpcMsg); -static void mgmtProcessSuperTableMetaMsg(SRpcMsg *rpcMsg); -static int32_t mgmtGetShowTableMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn); +static void mgmtProcessCreateTableMsg(SQueuedMsg *queueMsg); +static void mgmtProcessDropTableMsg(SQueuedMsg *queueMsg); +static void mgmtProcessAlterTableMsg(SQueuedMsg *queueMsg); +static void mgmtProcessTableMetaMsg(SQueuedMsg *queueMsg); +static void mgmtProcessMultiTableMetaMsg(SQueuedMsg *queueMsg); +static void mgmtProcessSuperTableMetaMsg(SQueuedMsg *queueMsg); +static void mgmtProcessCreateTableRsp(SRpcMsg *rpcMsg); +static void mgmtProcessDropTableRsp(SRpcMsg *rpcMsg); +static int32_t mgmtGetShowTableMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); +static void mgmtProcessAlterTableRsp(SRpcMsg *rpcMsg); +static void mgmtProcessDropStableRsp(SRpcMsg *rpcMsg); +static int32_t mgmtGetShowTableMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); static int32_t mgmtRetrieveShowTables(SShowObj *pShow, char *data, int32_t rows, void *pConn); +static void mgmtProcessGetTableMeta(STableInfo *pTable, void *thandle); int32_t mgmtInitTables() { int32_t code = mgmtInitSuperTables(); @@ -71,14 +79,18 @@ int32_t mgmtInitTables() { mgmtSetVgroupIdPool(); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CREATE_TABLE, mgmtProcessCreateTableMsg); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_DROP_TABLE, mgmtProcessDropTableMsg); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_ALTER_TABLE, mgmtProcessAlterTableMsg); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_TABLE_META, mgmtProcessTableMetaMsg); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_MULTI_TABLE_META, mgmtProcessMultiTableMetaMsg); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_STABLE_META, mgmtProcessSuperTableMetaMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_CREATE_TABLE, mgmtProcessCreateTableMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_DROP_TABLE, mgmtProcessDropTableMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_ALTER_TABLE, mgmtProcessAlterTableMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_TABLE_META, mgmtProcessTableMetaMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_TABLES_META, mgmtProcessMultiTableMetaMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_STABLE_META, mgmtProcessSuperTableMetaMsg); mgmtAddShellShowMetaHandle(TSDB_MGMT_TABLE_TABLE, mgmtGetShowTableMeta); mgmtAddShellShowRetrieveHandle(TSDB_MGMT_TABLE_TABLE, mgmtRetrieveShowTables); + mgmtAddDClientRspHandle(TSDB_MSG_TYPE_MD_CREATE_TABLE_RSP, mgmtProcessCreateTableRsp); + mgmtAddDClientRspHandle(TSDB_MSG_TYPE_MD_DROP_TABLE_RSP, mgmtProcessDropTableRsp); + mgmtAddDClientRspHandle(TSDB_MSG_TYPE_MD_ALTER_TABLE_RSP, mgmtProcessAlterTableRsp); + mgmtAddDClientRspHandle(TSDB_MSG_TYPE_MD_DROP_STABLE_RSP, mgmtProcessDropStableRsp); return TSDB_CODE_SUCCESS; } @@ -102,25 +114,23 @@ STableInfo* mgmtGetTable(char *tableId) { return NULL; } -STableInfo* mgmtGetTableByPos(uint32_t dnodeIp, int32_t vnode, int32_t sid) { - SDnodeObj *pObj = mgmtGetDnode(dnodeIp); - if (pObj != NULL && vnode >= 0 && vnode < pObj->numOfVnodes) { - int32_t vgId = pObj->vload[vnode].vgId; - SVgObj *pVgroup = mgmtGetVgroup(vgId); - if (pVgroup) { - return pVgroup->tableList[sid]; - } +STableInfo* mgmtGetTableByPos(uint32_t dnodeId, int32_t vnode, int32_t sid) { + SDnodeObj *pObj = mgmtGetDnode(dnodeId); + SVgObj *pVgroup = mgmtGetVgroup(vnode); + + if (pObj == NULL || pVgroup == NULL) { + return NULL; } - return NULL; + return pVgroup->tableList[sid]; } -int32_t mgmtGetTableMeta(SDbObj *pDb, STableInfo *pTable, STableMeta *pMeta, bool usePublicIp) { - if (pTable->type == TSDB_TABLE_TYPE_CHILD_TABLE) { +int32_t mgmtGetTableMeta(SDbObj *pDb, STableInfo *pTable, STableMetaMsg *pMeta, bool usePublicIp) { + if (pTable->type == TSDB_CHILD_TABLE) { mgmtGetChildTableMeta(pDb, (SChildTableObj *) pTable, pMeta, usePublicIp); - } else if (pTable->type == TSDB_TABLE_TYPE_NORMAL_TABLE) { + } else if (pTable->type == TSDB_NORMAL_TABLE) { mgmtGetNormalTableMeta(pDb, (SNormalTableObj *) pTable, pMeta, usePublicIp); - } else if (pTable->type == TSDB_TABLE_TYPE_SUPER_TABLE) { + } else if (pTable->type == TSDB_SUPER_TABLE) { mgmtGetSuperTableMeta(pDb, (SSuperTableObj *) pTable, pMeta, usePublicIp); } else { mTrace("%s, uid:%" PRIu64 " table meta retrieve failed, invalid type", pTable->tableId, pTable->uid); @@ -131,207 +141,7 @@ int32_t mgmtGetTableMeta(SDbObj *pDb, STableInfo *pTable, STableMeta *pMeta, boo return TSDB_CODE_SUCCESS; } - - -void mgmtProcessCreateVgroup(SCreateTableMsg *pCreate, int32_t contLen, void *thandle, bool isGetMeta) { - SRpcMsg rpcRsp = {.handle = thandle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - SDbObj *pDb = mgmtGetDb(pCreate->db); - if (pDb == NULL) { - mError("table:%s, failed to create vgroup, db not found", pCreate->tableId); - rpcRsp.code = TSDB_CODE_INVALID_DB; - rpcSendResponse(&rpcRsp); - return; - } - - SVgObj *pVgroup = mgmtCreateVgroup(pDb); - if (pVgroup == NULL) { - mError("table:%s, failed to alloc vnode to vgroup", pCreate->tableId); - rpcRsp.code = TSDB_CODE_NO_ENOUGH_DNODES; - rpcSendResponse(&rpcRsp); - return; - } - - void *cont = rpcMallocCont(contLen); - if (cont == NULL) { - mError("table:%s, failed to create table, can not alloc memory", pCreate->tableId); - rpcRsp.code = TSDB_CODE_SERV_OUT_OF_MEMORY; - rpcSendResponse(&rpcRsp); - return; - } - - memcpy(cont, pCreate, contLen); - - SProcessInfo *info = calloc(1, sizeof(SProcessInfo)); - info->type = TSDB_PROCESS_CREATE_VGROUP; - info->thandle = thandle; - info->ahandle = pVgroup; - info->cont = cont; - info->contLen = contLen; - - if (isGetMeta) { - info->type = TSDB_PROCESS_CREATE_VGROUP_GET_META; - } - - mgmtSendCreateVgroupMsg(pVgroup, info); -} - -//void mgmtSendCreateTableMsg(SDMCreateTableMsg *pCreate, SRpcIpSet *ipSet, void *ahandle) { -// mTrace("table:%s, send create table msg, ahandle:%p", pCreate->tableId, ahandle); -// SRpcMsg rpcMsg = { -// .handle = ahandle, -// .pCont = pCreate, -// .contLen = htonl(pCreate->contLen), -// .code = 0, -// .msgType = TSDB_MSG_TYPE_MD_CREATE_TABLE -// }; -// rpcSendRequest(tsMgmtDClientRpc, ipSet, &rpcMsg); -//} -// - - -void mgmtProcessCreateTable(SVgObj *pVgroup, SCreateTableMsg *pCreate, int32_t contLen, void *thandle, bool isGetMeta) { - assert(pVgroup != NULL); - SRpcMsg rpcRsp = {.handle = thandle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - - int32_t sid = taosAllocateId(pVgroup->idPool); - if (sid < 0) { - mTrace("table:%s, no enough sid in vgroup:%d, start to create a new vgroup", pCreate->tableId, pVgroup->vgId); - mgmtProcessCreateVgroup(pCreate, contLen, thandle, isGetMeta); - return; - } - - STableInfo *pTable; - SDMCreateTableMsg *pDCreate = NULL; - - if (pCreate->numOfColumns == 0) { - mTrace("table:%s, start to create child table, vgroup:%d sid:%d", pCreate->tableId, pVgroup->vgId, sid); - rpcRsp.code = mgmtCreateChildTable(pCreate, contLen, pVgroup, sid, &pDCreate, &pTable); - } else { - mTrace("table:%s, start to create normal table, vgroup:%d sid:%d", pCreate->tableId, pVgroup->vgId, sid); - rpcRsp.code = mgmtCreateNormalTable(pCreate, contLen, pVgroup, sid, &pDCreate, &pTable); - } - - if (rpcRsp.code != TSDB_CODE_SUCCESS) { - mTrace("table:%s, failed to create table in vgroup:%d sid:%d ", pCreate->tableId, pVgroup->vgId, sid); - rpcSendResponse(&rpcRsp); - return; - } - - assert(pDCreate != NULL); - assert(pTable != NULL); - - SProcessInfo *info = calloc(1, sizeof(SProcessInfo)); - info->type = TSDB_PROCESS_CREATE_TABLE; - info->thandle = thandle; - info->ahandle = pTable; - SRpcIpSet ipSet = mgmtGetIpSetFromVgroup(pVgroup); - if (isGetMeta) { - info->type = TSDB_PROCESS_CREATE_TABLE_GET_META; - } - - SRpcMsg rpcMsg = { - .handle = info, - .pCont = pCreate, - .contLen = htonl(pDCreate->contLen), - .code = 0, - .msgType = TSDB_MSG_TYPE_MD_CREATE_TABLE - }; - mgmtSendMsgToDnode(&ipSet, &rpcMsg); -} - -int32_t mgmtCreateTable(SCreateTableMsg *pCreate, int32_t contLen, void *thandle, bool isGetMeta) { - SDbObj *pDb = mgmtGetDb(pCreate->db); - if (pDb == NULL) { - mError("table:%s, failed to create table, db not selected", pCreate->tableId); - return TSDB_CODE_DB_NOT_SELECTED; - } - - STableInfo *pTable = mgmtGetTable(pCreate->tableId); - if (pTable != NULL) { - if (pCreate->igExists) { - mTrace("table:%s, table is already exist, think it success", pCreate->tableId); - return TSDB_CODE_SUCCESS; - } else { - mError("table:%s, failed to create table, table already exist", pCreate->tableId); - return TSDB_CODE_TABLE_ALREADY_EXIST; - } - } - - SAcctObj *pAcct = mgmtGetAcct(pDb->cfg.acct); - assert(pAcct != NULL); - - int32_t code = mgmtCheckTableLimit(pAcct, pCreate->numOfColumns); - if (code != TSDB_CODE_SUCCESS) { - mError("table:%s, failed to create table, table num exceed the limit", pCreate->tableId); - return code; - } - - if (mgmtCheckExpired()) { - mError("table:%s, failed to create table, grant expired", pCreate->tableId); - return TSDB_CODE_GRANT_EXPIRED; - } - - if (pCreate->numOfTags != 0) { - mTrace("table:%s, start to create super table, tags:%d columns:%d", - pCreate->tableId, pCreate->numOfTags, pCreate->numOfColumns); - return mgmtCreateSuperTable(pDb, pCreate); - } - - code = mgmtCheckTimeSeries(pCreate->numOfColumns); - if (code != TSDB_CODE_SUCCESS) { - mError("table:%s, failed to create table, timeseries exceed the limit", pCreate->tableId); - return TSDB_CODE_SUCCESS; - } - - SVgObj *pVgroup = mgmtGetAvailableVgroup(pDb); - if (pVgroup == NULL) { - mTrace("table:%s, no avaliable vgroup, start to create a new one", pCreate->tableId); - mgmtProcessCreateVgroup(pCreate, contLen, thandle, isGetMeta); - } else { - mTrace("table:%s, try to create table in vgroup:%d", pCreate->tableId, pVgroup->vgId); - mgmtProcessCreateTable(pVgroup, pCreate, contLen, thandle, isGetMeta); - } - - return TSDB_CODE_ACTION_IN_PROGRESS; -} - -int32_t mgmtDropTable(SDbObj *pDb, char *tableId, int32_t ignore) { - STableInfo *pTable = mgmtGetTable(tableId); - if (pTable == NULL) { - if (ignore) { - mTrace("table:%s, table is not exist, think it success", tableId); - return TSDB_CODE_SUCCESS; - } else { - mError("table:%s, failed to create table, table not exist", tableId); - return TSDB_CODE_INVALID_TABLE; - } - } - - if (mgmtCheckIsMonitorDB(pDb->name, tsMonitorDbName)) { - mError("table:%s, failed to create table, in monitor database", tableId); - return TSDB_CODE_MONITOR_DB_FORBIDDEN; - } - - switch (pTable->type) { - case TSDB_TABLE_TYPE_SUPER_TABLE: - mTrace("table:%s, start to drop super table", tableId); - return mgmtDropSuperTable(pDb, (SSuperTableObj *) pTable); - case TSDB_TABLE_TYPE_CHILD_TABLE: - mTrace("table:%s, start to drop child table", tableId); - return mgmtDropChildTable(pDb, (SChildTableObj *) pTable); - case TSDB_TABLE_TYPE_NORMAL_TABLE: - mTrace("table:%s, start to drop normal table", tableId); - return mgmtDropNormalTable(pDb, (SNormalTableObj *) pTable); - case TSDB_TABLE_TYPE_STREAM_TABLE: - mTrace("table:%s, start to drop stream table", tableId); - return mgmtDropNormalTable(pDb, (SNormalTableObj *) pTable); - default: - mError("table:%s, invalid table type:%d", tableId, pTable->type); - return TSDB_CODE_INVALID_TABLE; - } -} - -int32_t mgmtAlterTable(SDbObj *pDb, SAlterTableMsg *pAlter) { +int32_t mgmtAlterTable(SDbObj *pDb, SCMAlterTableMsg *pAlter) { STableInfo *pTable = mgmtGetTable(pAlter->tableId); if (pTable == NULL) { return TSDB_CODE_INVALID_TABLE; @@ -342,31 +152,31 @@ int32_t mgmtAlterTable(SDbObj *pDb, SAlterTableMsg *pAlter) { } if (pAlter->type == TSDB_ALTER_TABLE_ADD_TAG_COLUMN) { - if (pTable->type == TSDB_TABLE_TYPE_SUPER_TABLE) { + if (pTable->type == TSDB_SUPER_TABLE) { return mgmtAddSuperTableTag((SSuperTableObj *) pTable, pAlter->schema, 1); } } else if (pAlter->type == TSDB_ALTER_TABLE_DROP_TAG_COLUMN) { - if (pTable->type == TSDB_TABLE_TYPE_SUPER_TABLE) { + if (pTable->type == TSDB_SUPER_TABLE) { return mgmtDropSuperTableTag((SSuperTableObj *) pTable, pAlter->schema[0].name); } } else if (pAlter->type == TSDB_ALTER_TABLE_CHANGE_TAG_COLUMN) { - if (pTable->type == TSDB_TABLE_TYPE_SUPER_TABLE) { + if (pTable->type == TSDB_SUPER_TABLE) { return mgmtModifySuperTableTagNameByName((SSuperTableObj *) pTable, pAlter->schema[0].name, pAlter->schema[1].name); } } else if (pAlter->type == TSDB_ALTER_TABLE_UPDATE_TAG_VAL) { - if (pTable->type == TSDB_TABLE_TYPE_CHILD_TABLE) { + if (pTable->type == TSDB_CHILD_TABLE) { return mgmtModifyChildTableTagValueByName((SChildTableObj *) pTable, pAlter->schema[0].name, pAlter->tagVal); } } else if (pAlter->type == TSDB_ALTER_TABLE_ADD_COLUMN) { - if (pTable->type == TSDB_TABLE_TYPE_NORMAL_TABLE) { + if (pTable->type == TSDB_NORMAL_TABLE) { return mgmtAddNormalTableColumn((SNormalTableObj *) pTable, pAlter->schema, 1); - } else if (pTable->type == TSDB_TABLE_TYPE_SUPER_TABLE) { + } else if (pTable->type == TSDB_SUPER_TABLE) { return mgmtAddSuperTableColumn((SSuperTableObj *) pTable, pAlter->schema, 1); } else {} } else if (pAlter->type == TSDB_ALTER_TABLE_DROP_COLUMN) { - if (pTable->type == TSDB_TABLE_TYPE_NORMAL_TABLE) { + if (pTable->type == TSDB_NORMAL_TABLE) { return mgmtDropNormalTableColumnByName((SNormalTableObj *) pTable, pAlter->schema[0].name); - } else if (pTable->type == TSDB_TABLE_TYPE_SUPER_TABLE) { + } else if (pTable->type == TSDB_SUPER_TABLE) { return mgmtDropSuperTableColumnByName((SSuperTableObj *) pTable, pAlter->schema[0].name); } else {} } else {} @@ -380,14 +190,14 @@ void mgmtCleanUpTables() { mgmtCleanUpSuperTables(); } -int32_t mgmtGetShowTableMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { +int32_t mgmtGetShowTableMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { SDbObj *pDb = mgmtGetDb(pShow->db); if (pDb == NULL) { return TSDB_CODE_DB_NOT_SELECTED; } int32_t cols = 0; - SSchema *pSchema = tsGetSchema(pMeta); + SSchema *pSchema = pMeta->schema; pShow->bytes[cols] = TSDB_TABLE_NAME_LEN; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; @@ -409,7 +219,7 @@ int32_t mgmtGetShowTableMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { pShow->bytes[cols] = TSDB_TABLE_NAME_LEN; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; - strcpy(pSchema[cols].name, "stable"); + strcpy(pSchema[cols].name, "stable_name"); pSchema[cols].bytes = htons(pShow->bytes[cols]); cols++; @@ -539,122 +349,224 @@ int32_t mgmtRetrieveShowTables(SShowObj *pShow, char *data, int32_t rows, void * return numOfRows; } -SMDDropTableMsg *mgmtBuildRemoveTableMsg(STableInfo *pTable) { - SMDDropTableMsg *pRemove = NULL; +void mgmtProcessCreateTableMsg(SQueuedMsg *pMsg) { + if (mgmtCheckRedirect(pMsg->thandle)) return; + SCMCreateTableMsg *pCreate = pMsg->pCont; + mTrace("table:%s, create msg is received from thandle:%p", pCreate->tableId, pMsg->thandle); - return pRemove; -} + if (mgmtCheckExpired()) { + mError("table:%s, failed to create, grant expired", pCreate->tableId); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_GRANT_EXPIRED); + return; + } -void mgmtSetTableDirty(STableInfo *pTable, bool isDirty) { - pTable->dirty = isDirty; -} + if (!pMsg->pUser->writeAuth) { + mError("table:%s, failed to create, no rights", pCreate->tableId); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_NO_RIGHTS); + return; + } -void mgmtProcessCreateTableMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; + SAcctObj *pAcct = pMsg->pUser->pAcct; + int32_t code = mgmtCheckTableLimit(pAcct, htons(pCreate->numOfColumns)); + if (code != TSDB_CODE_SUCCESS) { + mError("table:%s, failed to create, exceed the limit", pCreate->tableId); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_NO_RIGHTS); + return; + } - SCreateTableMsg *pCreate = (SCreateTableMsg *) rpcMsg->pCont; - pCreate->numOfColumns = htons(pCreate->numOfColumns); - pCreate->numOfTags = htons(pCreate->numOfTags); - pCreate->sqlLen = htons(pCreate->sqlLen); + pMsg->pDb = mgmtGetDb(pCreate->db); + if (pMsg->pDb == NULL) { + mError("table:%s, failed to create, db not selected", pCreate->tableId); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_DB_NOT_SELECTED); + return; + } - SSchema *pSchema = (SSchema*) pCreate->schema; - for (int32_t i = 0; i < pCreate->numOfColumns + pCreate->numOfTags; ++i) { - pSchema->bytes = htons(pSchema->bytes); - pSchema->colId = i; - pSchema++; + STableInfo *pTable = mgmtGetTable(pCreate->tableId); + if (pTable != NULL) { + if (pCreate->igExists) { + mTrace("table:%s is already exist", pCreate->tableId); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_SUCCESS); + return; + } else { + mError("table:%s, failed to create, table already exist", pCreate->tableId); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_TABLE_ALREADY_EXIST); + return; + } } - if (mgmtCheckRedirect(rpcMsg->handle) != TSDB_CODE_SUCCESS) { - mError("table:%s, failed to create table, need redirect message", pCreate->tableId); + if (pCreate->numOfTags != 0) { + mTrace("table:%s, is a super table", pCreate->tableId); + code = mgmtCreateSuperTable(pCreate); + mgmtSendSimpleResp(pMsg->thandle, code); return; } - SUserObj *pUser = mgmtGetUserFromConn(rpcMsg->handle); - if (pUser == NULL) { - mError("table:%s, failed to create table, invalid user", pCreate->tableId); - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); + code = mgmtCheckTimeSeries(pCreate->numOfColumns); + if (code != TSDB_CODE_SUCCESS) { + mError("table:%s, failed to create, timeseries exceed the limit", pCreate->tableId); + mgmtSendSimpleResp(pMsg->thandle, code); return; } - if (!pUser->writeAuth) { - mError("table:%s, failed to create table, no rights", pCreate->tableId); - rpcRsp.code = TSDB_CODE_NO_RIGHTS; - rpcSendResponse(&rpcRsp); + SQueuedMsg *newMsg = malloc(sizeof(SQueuedMsg)); + memcpy(newMsg, pMsg, sizeof(SQueuedMsg)); + pMsg->pCont = NULL; + + SVgObj *pVgroup = mgmtGetAvailableVgroup(pMsg->pDb); + if (pVgroup == NULL) { + mTrace("table:%s, start to create a new vgroup", pCreate->tableId); + mgmtCreateVgroup(newMsg); return; } - int32_t code = mgmtCreateTable(pCreate, rpcMsg->contLen, rpcMsg->handle, false); - if (code != TSDB_CODE_ACTION_IN_PROGRESS) { - rpcRsp.code = code; - rpcSendResponse(&rpcRsp); + int32_t sid = taosAllocateId(pVgroup->idPool); + if (sid < 0) { + mTrace("tables:%s, no enough sid in vgroup:%d", pVgroup->vgId); + mgmtCreateVgroup(newMsg); + return; + } + + SMDCreateTableMsg *pMDCreate = NULL; + if (pCreate->numOfColumns == 0) { + mTrace("table:%s, is a child table, vgroup:%d sid:%d ahandle:%p", pCreate->tableId, pVgroup->vgId, sid, pMsg); + pTable = mgmtCreateChildTable(pCreate, pVgroup, sid); + if (pTable == NULL) { + mgmtSendSimpleResp(pMsg->thandle, terrno); + return; + } + pMDCreate = mgmtBuildCreateChildTableMsg(pCreate, (SChildTableObj *) pTable); + if (pMDCreate == NULL) { + mgmtSendSimpleResp(pMsg->thandle, terrno); + return; + } + } else { + mTrace("table:%s, is a normal table, vgroup:%d sid:%d ahandle:%p", pCreate->tableId, pVgroup->vgId, sid, pMsg); + pTable = mgmtCreateNormalTable(pCreate, pVgroup, sid); + if (pTable == NULL) { + mgmtSendSimpleResp(pMsg->thandle, terrno); + return; + } + pMDCreate = mgmtBuildCreateNormalTableMsg((SNormalTableObj *) pTable); + if (pMDCreate == NULL) { + mgmtSendSimpleResp(pMsg->thandle, terrno); + return; + } } + + SRpcIpSet ipSet = mgmtGetIpSetFromVgroup(pVgroup); + SRpcMsg rpcMsg = { + .handle = newMsg, + .pCont = pMDCreate, + .contLen = htonl(pMDCreate->contLen), + .code = 0, + .msgType = TSDB_MSG_TYPE_MD_CREATE_TABLE + }; + + newMsg->ahandle = pTable; + mgmtSendMsgToDnode(&ipSet, &rpcMsg); } -void mgmtProcessDropTableMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - SDropTableMsg *pDrop = (SDropTableMsg *) rpcMsg->pCont; +void mgmtProcessDropTableMsg(SQueuedMsg *pMsg) { + if (mgmtCheckRedirect(pMsg->thandle)) return; - if (mgmtCheckRedirect(rpcMsg->handle) != TSDB_CODE_SUCCESS) { - mError("table:%s, failed to drop table, need redirect message", pDrop->tableId); - return; - } + SCMDropTableMsg *pDrop = pMsg->pCont; + mTrace("table:%s, drop table msg is received from thandle:%p", pDrop->tableId, pMsg->thandle); - SUserObj *pUser = mgmtGetUserFromConn(rpcMsg->handle); - if (pUser == NULL) { - mError("table:%s, failed to drop table, invalid user", pDrop->tableId); - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); + if (mgmtCheckExpired()) { + mError("table:%s, failed to drop, grant expired", pDrop->tableId); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_GRANT_EXPIRED); return; } - if (!pUser->writeAuth) { - mError("table:%s, failed to drop table, no rights", pDrop->tableId); - rpcRsp.code = TSDB_CODE_NO_RIGHTS; - rpcSendResponse(&rpcRsp); + if (!pMsg->pUser->writeAuth) { + mError("table:%s, failed to drop, no rights", pDrop->tableId); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_NO_RIGHTS); return; } SDbObj *pDb = mgmtGetDbByTableId(pDrop->tableId); if (pDb == NULL) { mError("table:%s, failed to drop table, db not selected", pDrop->tableId); - rpcRsp.code = TSDB_CODE_DB_NOT_SELECTED; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_DB_NOT_SELECTED); return; } - int32_t code = mgmtDropTable(pDb, pDrop->tableId, pDrop->igNotExists); - if (code != TSDB_CODE_ACTION_IN_PROGRESS) { - rpcRsp.code = code; - rpcSendResponse(&rpcRsp); + STableInfo *pTable = mgmtGetTable(pDrop->tableId); + if (pTable == NULL) { + if (pDrop->igNotExists) { + mTrace("table:%s, table is not exist, think drop success", pDrop->tableId); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_SUCCESS); + return; + } else { + mError("table:%s, failed to drop table, table not exist", pDrop->tableId); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_INVALID_TABLE); + return; + } + } + + if (mgmtCheckIsMonitorDB(pDb->name, tsMonitorDbName)) { + mError("table:%s, failed to drop table, in monitor database", pDrop->tableId); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_MONITOR_DB_FORBIDDEN); + return; + } + + SQueuedMsg *newMsg = malloc(sizeof(SQueuedMsg)); + memcpy(newMsg, pMsg, sizeof(SQueuedMsg)); + pMsg->pCont = NULL; + int32_t code; + + switch (pTable->type) { + case TSDB_SUPER_TABLE: + mTrace("table:%s, start to drop super table", pDrop->tableId); + code = mgmtDropSuperTable(newMsg, pDb, (SSuperTableObj *) pTable); + break; + case TSDB_CHILD_TABLE: + mTrace("table:%s, start to drop child table", pDrop->tableId); + code = mgmtDropChildTable(newMsg, (SChildTableObj *) pTable); + break; + case TSDB_NORMAL_TABLE: + mTrace("table:%s, start to drop normal table", pDrop->tableId); + code = mgmtDropNormalTable(newMsg, (SNormalTableObj *) pTable); + break; + case TSDB_STREAM_TABLE: + mTrace("table:%s, start to drop stream table", pDrop->tableId); + code = mgmtDropNormalTable(newMsg, (SNormalTableObj *) pTable); + break; + default: + code = TSDB_CODE_INVALID_TABLE_TYPE; + mError("table:%s, invalid table type:%d", pDrop->tableId, pTable->type); + } + + if (code != TSDB_CODE_SUCCESS) { + free(newMsg); + mgmtSendSimpleResp(pMsg->thandle, code); } } -void mgmtProcessAlterTableMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - if (mgmtCheckRedirect(rpcMsg->handle) != TSDB_CODE_SUCCESS) { +void mgmtProcessAlterTableMsg(SQueuedMsg *pMsg) { + if (mgmtCheckRedirect(pMsg->thandle)) { return; } - SUserObj *pUser = mgmtGetUserFromConn(rpcMsg->handle); + SUserObj *pUser = mgmtGetUserFromConn(pMsg->thandle); if (pUser == NULL) { - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_INVALID_USER); return; } - SAlterTableMsg *pAlter = (SAlterTableMsg *) rpcMsg->pCont; + SCMAlterTableMsg *pAlter = pMsg->pCont; + int32_t code; if (!pUser->writeAuth) { - rpcRsp.code = TSDB_CODE_NO_RIGHTS; + code = TSDB_CODE_NO_RIGHTS; } else { pAlter->type = htons(pAlter->type); pAlter->numOfCols = htons(pAlter->numOfCols); if (pAlter->numOfCols > 2) { mError("table:%s error numOfCols:%d in alter table", pAlter->tableId, pAlter->numOfCols); - rpcRsp.code = TSDB_CODE_APP_ERROR; + code = TSDB_CODE_APP_ERROR; } else { SDbObj *pDb = mgmtGetDb(pAlter->db); if (pDb) { @@ -662,23 +574,23 @@ void mgmtProcessAlterTableMsg(SRpcMsg *rpcMsg) { pAlter->schema[i].bytes = htons(pAlter->schema[i].bytes); } - rpcRsp.code = mgmtAlterTable(pDb, pAlter); - if (rpcRsp.code == 0) { + code = mgmtAlterTable(pDb, pAlter); + if (code == 0) { mLPrint("table:%s is altered by %s", pAlter->tableId, pUser->user); } } else { - rpcRsp.code = TSDB_CODE_DB_NOT_SELECTED; + code = TSDB_CODE_DB_NOT_SELECTED; } } } - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, code); } void mgmtProcessGetTableMeta(STableInfo *pTable, void *thandle) { SRpcMsg rpcRsp = {.handle = thandle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; SDbObj* pDb = mgmtGetDbByTableId(pTable->tableId); - if (pDb == NULL || pDb->dropStatus != TSDB_DB_STATUS_READY) { + if (pDb == NULL || pDb->dirty) { mError("table:%s, failed to get table meta, db not selected", pTable->tableId); rpcRsp.code = TSDB_CODE_DB_NOT_SELECTED; rpcSendResponse(&rpcRsp); @@ -686,38 +598,36 @@ void mgmtProcessGetTableMeta(STableInfo *pTable, void *thandle) { } SRpcConnInfo connInfo; - rpcGetConnInfo(thandle, &connInfo); - bool usePublicIp = (connInfo.serverIp == tsPublicIpInt); + if (rpcGetConnInfo(thandle, &connInfo) != 0) { + mError("conn:%p is already released while get table meta", thandle); + return; + } - STableMeta *pMeta = rpcMallocCont(sizeof(STableMeta) + sizeof(SSchema) * TSDB_MAX_COLUMNS); + bool usePublicIp = (connInfo.serverIp == tsPublicIpInt); + + STableMetaMsg *pMeta = rpcMallocCont(sizeof(STableMetaMsg) + sizeof(SSchema) * TSDB_MAX_COLUMNS); rpcRsp.code = mgmtGetTableMeta(pDb, pTable, pMeta, usePublicIp); if (rpcRsp.code != TSDB_CODE_SUCCESS) { rpcFreeCont(pMeta); } else { - pMeta->contLen = htons(pMeta->contLen); rpcRsp.pCont = pMeta; rpcRsp.contLen = pMeta->contLen; + + pMeta->contLen = htons(pMeta->contLen); } rpcSendResponse(&rpcRsp); } - -void mgmtProcessTableMetaMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp; - rpcRsp.handle = rpcMsg->handle; - rpcRsp.pCont = NULL; - rpcRsp.contLen = 0; - - STableInfoMsg *pInfo = rpcMsg->pCont; +void mgmtProcessTableMetaMsg(SQueuedMsg *pMsg) { + SCMTableInfoMsg *pInfo = pMsg->pCont; pInfo->createFlag = htons(pInfo->createFlag); - SUserObj *pUser = mgmtGetUserFromConn(rpcMsg->handle); + SUserObj *pUser = mgmtGetUserFromConn(pMsg->thandle); if (pUser == NULL) { mError("table:%s, failed to get table meta, invalid user", pInfo->tableId); - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_INVALID_USER); return; } @@ -725,22 +635,20 @@ void mgmtProcessTableMetaMsg(SRpcMsg *rpcMsg) { if (pTable == NULL) { if (pInfo->createFlag != 1) { mError("table:%s, failed to get table meta, table not exist", pInfo->tableId); - rpcRsp.code = TSDB_CODE_INVALID_TABLE; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_INVALID_TABLE); return; } else { // on demand create table from super table if table does not exists - if (mgmtCheckRedirect(rpcMsg->handle) != TSDB_CODE_SUCCESS) { + if (mgmtCheckRedirect(pMsg->thandle)) { mError("table:%s, failed to create table while get meta info, need redirect message", pInfo->tableId); return; } - int32_t contLen = sizeof(SCreateTableMsg) + sizeof(STagData); - SCreateTableMsg *pCreateMsg = rpcMallocCont(contLen); + int32_t contLen = sizeof(SCMCreateTableMsg) + sizeof(STagData); + SCMCreateTableMsg *pCreateMsg = rpcMallocCont(contLen); if (pCreateMsg == NULL) { mError("table:%s, failed to create table while get meta info, no enough memory", pInfo->tableId); - rpcRsp.code = TSDB_CODE_SERV_OUT_OF_MEMORY; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_SERV_OUT_OF_MEMORY); return; } @@ -748,38 +656,34 @@ void mgmtProcessTableMetaMsg(SRpcMsg *rpcMsg) { strcpy(pCreateMsg->tableId, pInfo->tableId); mError("table:%s, start to create table while get meta info", pInfo->tableId); - mgmtCreateTable(pCreateMsg, contLen, rpcMsg->handle, true); +// mgmtCreateTable(pCreateMsg, contLen, pMsg->thandle, true); } } else { - mgmtProcessGetTableMeta(pTable, rpcMsg->handle); + mgmtProcessGetTableMeta(pTable, pMsg->thandle); } } -void mgmtProcessMultiTableMetaMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp; - rpcRsp.handle = rpcMsg->handle; - rpcRsp.pCont = NULL; - rpcRsp.contLen = 0; - +void mgmtProcessMultiTableMetaMsg(SQueuedMsg *pMsg) { SRpcConnInfo connInfo; - rpcGetConnInfo(rpcMsg->handle, &connInfo); + if (rpcGetConnInfo(pMsg->thandle, &connInfo) != 0) { + mError("conn:%p is already released while get mulit table meta", pMsg->thandle); + return; + } bool usePublicIp = (connInfo.serverIp == tsPublicIpInt); SUserObj *pUser = mgmtGetUser(connInfo.user); if (pUser == NULL) { - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_INVALID_USER); return; } - SMultiTableInfoMsg *pInfo = rpcMsg->pCont; + SCMMultiTableInfoMsg *pInfo = pMsg->pCont; pInfo->numOfTables = htonl(pInfo->numOfTables); int32_t totalMallocLen = 4*1024*1024; // first malloc 4 MB, subsequent reallocation as twice SMultiTableMeta *pMultiMeta = rpcMallocCont(totalMallocLen); if (pMultiMeta == NULL) { - rpcRsp.code = TSDB_CODE_SERV_OUT_OF_MEMORY; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_SERV_OUT_OF_MEMORY); return; } @@ -795,7 +699,7 @@ void mgmtProcessMultiTableMetaMsg(SRpcMsg *rpcMsg) { if (pDb == NULL) continue; int availLen = totalMallocLen - pMultiMeta->contLen; - if (availLen <= sizeof(STableMeta) + sizeof(SSchema) * TSDB_MAX_COLUMNS) { + if (availLen <= sizeof(STableMetaMsg) + sizeof(SSchema) * TSDB_MAX_COLUMNS) { //TODO realloc //totalMallocLen *= 2; //pMultiMeta = rpcReMalloc(pMultiMeta, totalMallocLen); @@ -808,7 +712,7 @@ void mgmtProcessMultiTableMetaMsg(SRpcMsg *rpcMsg) { //} } - STableMeta *pMeta = (STableMeta *)(pMultiMeta->metas + pMultiMeta->contLen); + STableMetaMsg *pMeta = (STableMetaMsg *)(pMultiMeta->metas + pMultiMeta->contLen); int32_t code = mgmtGetTableMeta(pDb, pTable, pMeta, usePublicIp); if (code == TSDB_CODE_SUCCESS) { pMultiMeta->numOfTables ++; @@ -816,29 +720,156 @@ void mgmtProcessMultiTableMetaMsg(SRpcMsg *rpcMsg) { } } + SRpcMsg rpcRsp = {0}; + rpcRsp.handle = pMsg->thandle; rpcRsp.pCont = pMultiMeta; rpcRsp.contLen = pMultiMeta->contLen; rpcSendResponse(&rpcRsp); } -void mgmtProcessSuperTableMetaMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - SSuperTableInfoMsg *pInfo = rpcMsg->pCont; +void mgmtProcessSuperTableMetaMsg(SQueuedMsg *pMsg) { + SCMSuperTableInfoMsg *pInfo = pMsg->pCont; STableInfo *pTable = mgmtGetSuperTable(pInfo->tableId); if (pTable == NULL) { - rpcRsp.code = TSDB_CODE_INVALID_TABLE; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_INVALID_TABLE); return; } - SSuperTableInfoRsp *pRsp = mgmtGetSuperTableVgroup((SSuperTableObj *) pTable); + SCMSuperTableInfoRsp *pRsp = mgmtGetSuperTableVgroup((SSuperTableObj *) pTable); if (pRsp != NULL) { int32_t msgLen = sizeof(SSuperTableObj) + htonl(pRsp->numOfDnodes) * sizeof(int32_t); + SRpcMsg rpcRsp = {0}; + rpcRsp.handle = pMsg->thandle; rpcRsp.pCont = pRsp; rpcRsp.contLen = msgLen; rpcSendResponse(&rpcRsp); } else { - rpcRsp.code = TSDB_CODE_INVALID_TABLE; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_INVALID_TABLE); + } +} + +static void mgmtProcessCreateTableRsp(SRpcMsg *rpcMsg) { + if (rpcMsg->handle == NULL) return; + + SQueuedMsg *queueMsg = rpcMsg->handle; + queueMsg->received++; + + STableInfo *pTable = queueMsg->ahandle; + mTrace("table:%s, create table rsp received, thandle:%p ahandle:%p result:%s", pTable->tableId, queueMsg->thandle, + rpcMsg->handle, tstrerror(rpcMsg->code)); + + if (rpcMsg->code != TSDB_CODE_SUCCESS) { + if (pTable->type == TSDB_CHILD_TABLE) { + sdbDeleteRow(tsChildTableSdb, pTable); + } else if (pTable->type == TSDB_NORMAL_TABLE){ + sdbDeleteRow(tsNormalTableSdb, pTable); + } else {} + mError("table:%s, failed to create in dnode, reason:%s", pTable->tableId, tstrerror(rpcMsg->code)); + mgmtSendSimpleResp(queueMsg->thandle, rpcMsg->code); + } else { + mTrace("table:%s, created in dnode", pTable->tableId); + if (queueMsg->msgType != TSDB_MSG_TYPE_CM_CREATE_TABLE) { + SQueuedMsg *newMsg = calloc(1, sizeof(SQueuedMsg)); + newMsg->msgType = queueMsg->msgType; + newMsg->thandle = queueMsg->thandle; + newMsg->pDb = queueMsg->pDb; + newMsg->pUser = queueMsg->pUser; + newMsg->contLen = queueMsg->contLen; + newMsg->pCont = rpcMallocCont(newMsg->contLen); + memcpy(newMsg->pCont, queueMsg->pCont, newMsg->contLen); + mTrace("table:%s, start to get meta", pTable->tableId); + mgmtAddToShellQueue(newMsg); + } else { + mgmtSendSimpleResp(queueMsg->thandle, rpcMsg->code); + } } -} \ No newline at end of file + + free(queueMsg); +} + +static void mgmtProcessAlterTableRsp(SRpcMsg *rpcMsg) { + mTrace("alter table rsp received, handle:%p code:%d", rpcMsg->handle, rpcMsg->code); +} + +static void mgmtProcessDropTableRsp(SRpcMsg *rpcMsg) { + if (rpcMsg->handle == NULL) return; + + SQueuedMsg *queueMsg = rpcMsg->handle; + queueMsg->received++; + + STableInfo *pTable = queueMsg->ahandle; + mTrace("table:%s, drop table rsp received, thandle:%p result:%s", pTable->tableId, queueMsg->thandle, tstrerror(rpcMsg->code)); + + if (rpcMsg->code != TSDB_CODE_SUCCESS) { + mError("table:%s, failed to drop in dnode, reason:%s", pTable->tableId, tstrerror(rpcMsg->code)); + mgmtSendSimpleResp(queueMsg->thandle, rpcMsg->code); + free(queueMsg); + return; + } + + SVgObj *pVgroup = mgmtGetVgroup(pTable->vgId); + if (pVgroup == NULL) { + mError("table:%s, failed to get vgroup", pTable->tableId); + mgmtSendSimpleResp(queueMsg->thandle, TSDB_CODE_INVALID_VGROUP_ID); + free(queueMsg); + return; + } + + if (pTable->type == TSDB_CHILD_TABLE) { + if (sdbDeleteRow(tsChildTableSdb, pTable) < 0) { + mError("table:%s, update ctables sdb error", pTable->tableId); + mgmtSendSimpleResp(queueMsg->thandle, TSDB_CODE_SDB_ERROR); + free(queueMsg); + return; + } + } else if (pTable->type == TSDB_NORMAL_TABLE){ + if (sdbDeleteRow(tsNormalTableSdb, pTable) < 0) { + mError("table:%s, update ntables sdb error", pTable->tableId); + mgmtSendSimpleResp(queueMsg->thandle, TSDB_CODE_SDB_ERROR); + free(queueMsg); + return; + } + } + + if (pVgroup->numOfTables <= 0) { + mPrint("vgroup:%d, all tables is dropped, drop vgroup", pVgroup->vgId); + mgmtDropVgroup(pVgroup, NULL); + } + + mgmtSendSimpleResp(queueMsg->thandle, TSDB_CODE_SUCCESS); + free(queueMsg); +} + +static void mgmtProcessDropStableRsp(SRpcMsg *rpcMsg) { + mTrace("drop stable rsp received, handle:%p code:%d", rpcMsg->handle, rpcMsg->code); +} + +// +// +//static void mgmtProcessTableCfgMsg(int8_t msgType, int8_t *pCont, int32_t contLen, void *thandle) { +// SDMConfigTableMsg *pCfg = (SDMConfigTableMsg *) pCont; +// pCfg->dnode = htonl(pCfg->dnode); +// pCfg->vnode = htonl(pCfg->vnode); +// pCfg->sid = htonl(pCfg->sid); +// mTrace("dnode:%s, vnode:%d, sid:%d, receive table config msg", taosIpStr(pCfg->dnode), pCfg->vnode, pCfg->sid); +// +// if (!sdbMaster) { +// mError("dnode:%s, vnode:%d, sid:%d, not master, redirect it", taosIpStr(pCfg->dnode), pCfg->vnode, pCfg->sid); +// mgmtSendRspToDnode(thandle, msgType + 1, TSDB_CODE_REDIRECT, NULL, 0); +// return; +// } +// +// STableInfo *pTable = mgmtGetTableByPos(pCfg->dnode, pCfg->vnode, pCfg->sid); +// if (pTable == NULL) { +// mError("dnode:%s, vnode:%d, sid:%d, table not found", taosIpStr(pCfg->dnode), pCfg->vnode, pCfg->sid); +// mgmtSendRspToDnode(thandle, msgType + 1, TSDB_CODE_INVALID_TABLE, NULL, 0); +// return; +// } +// +// mgmtSendRspToDnode(thandle, msgType + 1, TSDB_CODE_SUCCESS, NULL, 0); +// +// //TODO +// SRpcIpSet ipSet = mgmtGetIpSetFromIp(pCfg->dnode); +// mgmtSendCreateTableMsg(NULL, &ipSet, NULL); +//} +// \ No newline at end of file diff --git a/src/mnode/src/mgmtUser.c b/src/mnode/src/mgmtUser.c index e3ea2292d81a59e1f62ccb1a27d5128132044a73..771c78c2229a2191cb64973a81553b026e38926f 100644 --- a/src/mnode/src/mgmtUser.c +++ b/src/mnode/src/mgmtUser.c @@ -16,7 +16,6 @@ #define _DEFAULT_SOURCE #include "os.h" #include "trpc.h" -#include "tschemautil.h" #include "ttime.h" #include "mgmtAcct.h" #include "mgmtGrant.h" @@ -30,12 +29,12 @@ static int32_t tsUserUpdateSize = 0; static int32_t mgmtCreateUser(SAcctObj *pAcct, char *name, char *pass); static int32_t mgmtDropUser(SAcctObj *pAcct, char *name); static int32_t mgmtUpdateUser(SUserObj *pUser); -static int32_t mgmtGetUserMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn); +static int32_t mgmtGetUserMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); static int32_t mgmtRetrieveUsers(SShowObj *pShow, char *data, int32_t rows, void *pConn); -static void mgmtProcessCreateUserMsg(SRpcMsg *rpcMsg); -static void mgmtProcessAlterUserMsg(SRpcMsg *rpcMsg); -static void mgmtProcessDropUserMsg(SRpcMsg *rpcMsg); +static void mgmtProcessCreateUserMsg(SQueuedMsg *pMsg); +static void mgmtProcessAlterUserMsg(SQueuedMsg *pMsg); +static void mgmtProcessDropUserMsg(SQueuedMsg *pMsg); static void *(*mgmtUserActionFp[SDB_MAX_ACTION_TYPES])(void *row, char *str, int32_t size, int32_t *ssize); static void *mgmtUserActionInsert(void *row, char *str, int32_t size, int32_t *ssize); @@ -59,7 +58,7 @@ int32_t mgmtInitUsers() { SUserObj tObj; tsUserUpdateSize = tObj.updateEnd - (int8_t *)&tObj; - tsUserSdb = sdbOpenTable(tsMaxUsers, tsUserUpdateSize, "user", SDB_KEYTYPE_STRING, tsMgmtDirectory, mgmtUserAction); + tsUserSdb = sdbOpenTable(tsMaxUsers, tsUserUpdateSize, "users", SDB_KEYTYPE_STRING, tsMnodeDir, mgmtUserAction); if (tsUserSdb == NULL) { mError("failed to init user data"); return -1; @@ -83,9 +82,9 @@ int32_t mgmtInitUsers() { mgmtCreateUser(pAcct, "monitor", tsInternalPass); mgmtCreateUser(pAcct, "_root", tsInternalPass); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CREATE_USER, mgmtProcessCreateUserMsg); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_ALTER_USER, mgmtProcessAlterUserMsg); - mgmtAddShellMsgHandle(TSDB_MSG_TYPE_DROP_USER, mgmtProcessDropUserMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_CREATE_USER, mgmtProcessCreateUserMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_ALTER_USER, mgmtProcessAlterUserMsg); + mgmtAddShellMsgHandle(TSDB_MSG_TYPE_CM_DROP_USER, mgmtProcessDropUserMsg); mgmtAddShellShowMetaHandle(TSDB_MGMT_TABLE_USER, mgmtGetUserMeta); mgmtAddShellShowRetrieveHandle(TSDB_MGMT_TABLE_USER, mgmtRetrieveUsers); @@ -123,7 +122,7 @@ static int32_t mgmtCreateUser(SAcctObj *pAcct, char *name, char *pass) { SUserObj *pUser = (SUserObj *)sdbGetRow(tsUserSdb, name); if (pUser != NULL) { - mWarn("user:%s is already there", name); + mTrace("user:%s is already there", name); return TSDB_CODE_USER_ALREADY_EXIST; } @@ -171,14 +170,14 @@ static int32_t mgmtDropUser(SAcctObj *pAcct, char *name) { return 0; } -static int32_t mgmtGetUserMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { +static int32_t mgmtGetUserMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { SUserObj *pUser = mgmtGetUserFromConn(pConn); if (pUser == NULL) { return TSDB_CODE_INVALID_USER; } int32_t cols = 0; - SSchema *pSchema = tsGetSchema(pMeta); + SSchema *pSchema = pMeta->schema; pShow->bytes[cols] = TSDB_USER_LEN; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; @@ -194,7 +193,7 @@ static int32_t mgmtGetUserMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) pShow->bytes[cols] = 8; pSchema[cols].type = TSDB_DATA_TYPE_TIMESTAMP; - strcpy(pSchema[cols].name, "created time"); + strcpy(pSchema[cols].name, "created_time"); pSchema[cols].bytes = htons(pShow->bytes[cols]); cols++; @@ -330,57 +329,47 @@ static void *mgmtUserActionDestroy(void *row, char *str, int32_t size, int32_t * SUserObj *mgmtGetUserFromConn(void *pConn) { SRpcConnInfo connInfo; - rpcGetConnInfo(pConn, &connInfo); + if (rpcGetConnInfo(pConn, &connInfo) == 0) { + return mgmtGetUser(connInfo.user); + } - return mgmtGetUser(connInfo.user); + return NULL; } -static void mgmtProcessCreateUserMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - if (mgmtCheckRedirect(rpcMsg->handle)) return; - - SUserObj *pUser = mgmtGetUserFromConn(rpcMsg->handle); - if (pUser == NULL) { - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); - return; - } +static void mgmtProcessCreateUserMsg(SQueuedMsg *pMsg) { + if (mgmtCheckRedirect(pMsg->thandle)) return; + int32_t code; + SUserObj *pUser = pMsg->pUser; + if (pUser->superAuth) { - SCreateUserMsg *pCreate = rpcMsg->pCont; - rpcRsp.code = mgmtCreateUser(pUser->pAcct, pCreate->user, pCreate->pass); - if (rpcRsp.code == TSDB_CODE_SUCCESS) { + SCMCreateUserMsg *pCreate = pMsg->pCont; + code = mgmtCreateUser(pUser->pAcct, pCreate->user, pCreate->pass); + if (code == TSDB_CODE_SUCCESS) { mLPrint("user:%s is created by %s", pCreate->user, pUser->user); } } else { - rpcRsp.code = TSDB_CODE_NO_RIGHTS; + code = TSDB_CODE_NO_RIGHTS; } - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, code); } -static void mgmtProcessAlterUserMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - if (mgmtCheckRedirect(rpcMsg->handle)) return; +static void mgmtProcessAlterUserMsg(SQueuedMsg *pMsg) { + if (mgmtCheckRedirect(pMsg->thandle)) return; - SUserObj *pOperUser = mgmtGetUserFromConn(rpcMsg->handle); - if (pOperUser == NULL) { - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); - return; - } - - SAlterUserMsg *pAlter = rpcMsg->pCont; + int32_t code; + SUserObj *pOperUser = pMsg->pUser; + + SCMAlterUserMsg *pAlter = pMsg->pCont; SUserObj *pUser = mgmtGetUser(pAlter->user); if (pUser == NULL) { - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_INVALID_USER); return; } if (strcmp(pUser->user, "monitor") == 0 || (strcmp(pUser->user + 1, pUser->acct) == 0 && pUser->user[0] == '_')) { - rpcRsp.code = TSDB_CODE_NO_RIGHTS; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_NO_RIGHTS); return; } @@ -403,13 +392,13 @@ static void mgmtProcessAlterUserMsg(SRpcMsg *rpcMsg) { if (hasRight) { memset(pUser->pass, 0, sizeof(pUser->pass)); taosEncryptPass((uint8_t*)pAlter->pass, strlen(pAlter->pass), pUser->pass); - rpcRsp.code = mgmtUpdateUser(pUser); - mLPrint("user:%s password is altered by %s, code:%d", pAlter->user, pUser->user, rpcRsp.code); + code = mgmtUpdateUser(pUser); + mLPrint("user:%s password is altered by %s, code:%d", pAlter->user, pUser->user, code); } else { - rpcRsp.code = TSDB_CODE_NO_RIGHTS; + code = TSDB_CODE_NO_RIGHTS; } - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, code); return; } @@ -452,42 +441,34 @@ static void mgmtProcessAlterUserMsg(SRpcMsg *rpcMsg) { pUser->writeAuth = 1; } - rpcRsp.code = mgmtUpdateUser(pUser); - mLPrint("user:%s privilege is altered by %s, code:%d", pAlter->user, pUser->user, rpcRsp.code); + code = mgmtUpdateUser(pUser); + mLPrint("user:%s privilege is altered by %s, code:%d", pAlter->user, pUser->user, code); } else { - rpcRsp.code = TSDB_CODE_NO_RIGHTS; + code = TSDB_CODE_NO_RIGHTS; } - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, code); return; } - rpcRsp.code = TSDB_CODE_NO_RIGHTS; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_NO_RIGHTS); } -static void mgmtProcessDropUserMsg(SRpcMsg *rpcMsg) { - SRpcMsg rpcRsp = {.handle = rpcMsg->handle, .pCont = NULL, .contLen = 0, .code = 0, .msgType = 0}; - if (mgmtCheckRedirect(rpcMsg->handle)) return; +static void mgmtProcessDropUserMsg(SQueuedMsg *pMsg) { + if (mgmtCheckRedirect(pMsg->thandle)) return; - SUserObj *pOperUser = mgmtGetUserFromConn(rpcMsg->handle); - if (pOperUser == NULL) { - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); - return ; - } + int32_t code; + SUserObj *pOperUser = pMsg->pUser; - SDropUserMsg *pDrop = rpcMsg->pCont; + SCMDropUserMsg *pDrop = pMsg->pCont; SUserObj *pUser = mgmtGetUser(pDrop->user); if (pUser == NULL) { - rpcRsp.code = TSDB_CODE_INVALID_USER; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_INVALID_USER); return ; } if (strcmp(pUser->user, "monitor") == 0 || (strcmp(pUser->user + 1, pUser->acct) == 0 && pUser->user[0] == '_')) { - rpcRsp.code = TSDB_CODE_NO_RIGHTS; - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_NO_RIGHTS); return ; } @@ -509,13 +490,13 @@ static void mgmtProcessDropUserMsg(SRpcMsg *rpcMsg) { } if (hasRight) { - rpcRsp.code = mgmtDropUser(pUser->pAcct, pDrop->user); - if (rpcRsp.code == TSDB_CODE_SUCCESS) { + code = mgmtDropUser(pUser->pAcct, pDrop->user); + if (code == TSDB_CODE_SUCCESS) { mLPrint("user:%s is dropped by %s", pDrop->user, pUser->user); } } else { - rpcRsp.code = TSDB_CODE_NO_RIGHTS; + code = TSDB_CODE_NO_RIGHTS; } - rpcSendResponse(&rpcRsp); + mgmtSendSimpleResp(pMsg->thandle, code); } diff --git a/src/mnode/src/mgmtVgroup.c b/src/mnode/src/mgmtVgroup.c index f8331ec968bf1cf23386d3024040097838072a48..f84b5443a5bd289c78bf66f3672fc4626fe722d9 100644 --- a/src/mnode/src/mgmtVgroup.c +++ b/src/mnode/src/mgmtVgroup.c @@ -17,13 +17,13 @@ #include "os.h" #include "taoserror.h" #include "tlog.h" -#include "tschemautil.h" #include "tstatus.h" #include "mnode.h" #include "mgmtBalance.h" #include "mgmtDb.h" #include "mgmtDClient.h" #include "mgmtDnode.h" +#include "mgmtProfile.h" #include "mgmtShell.h" #include "mgmtTable.h" #include "mgmtVgroup.h" @@ -40,8 +40,13 @@ static void *mgmtVgroupActionDecode(void *row, char *str, int32_t size, int32_t static void *mgmtVgroupActionReset(void *row, char *str, int32_t size, int32_t *ssize); static void *mgmtVgroupActionDestroy(void *row, char *str, int32_t size, int32_t *ssize); -static int32_t mgmtGetVgroupMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn); +static int32_t mgmtGetVgroupMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn); static int32_t mgmtRetrieveVgroups(SShowObj *pShow, char *data, int32_t rows, void *pConn); +static void mgmtProcessCreateVnodeRsp(SRpcMsg *rpcMsg); +static void mgmtProcessDropVnodeRsp(SRpcMsg *rpcMsg); + +static void mgmtSendDropVgroupMsg(SVgObj *pVgroup, void *ahandle); +static void mgmtSendCreateVgroupMsg(SVgObj *pVgroup, void *ahandle); static void mgmtVgroupActionInit() { SVgObj tObj; @@ -69,7 +74,7 @@ int32_t mgmtInitVgroups() { mgmtVgroupActionInit(); - tsVgroupSdb = sdbOpenTable(tsMaxVGroups, tsVgUpdateSize, "vgroups", SDB_KEYTYPE_AUTO, tsMgmtDirectory, mgmtVgroupAction); + tsVgroupSdb = sdbOpenTable(tsMaxVGroups, tsVgUpdateSize, "vgroups", SDB_KEYTYPE_AUTO, tsMnodeDir, mgmtVgroupAction); if (tsVgroupSdb == NULL) { mError("failed to init vgroups data"); return -1; @@ -105,15 +110,17 @@ int32_t mgmtInitVgroups() { if (tsIsCluster && pVgroup->vnodeGid[0].publicIp == 0) { pVgroup->vnodeGid[0].publicIp = inet_addr(tsPublicIp); - pVgroup->vnodeGid[0].ip = inet_addr(tsPrivateIp); + pVgroup->vnodeGid[0].privateIp = inet_addr(tsPrivateIp); sdbUpdateRow(tsVgroupSdb, pVgroup, tsVgUpdateSize, 1); } - mgmtSetDnodeVgid(pVgroup->vnodeGid, pVgroup->numOfVnodes, pVgroup->vgId); + // mgmtSetDnodeVgid(pVgroup->vnodeGid, pVgroup->numOfVnodes, pVgroup->vgId); } mgmtAddShellShowMetaHandle(TSDB_MGMT_TABLE_VGROUP, mgmtGetVgroupMeta); mgmtAddShellShowRetrieveHandle(TSDB_MGMT_TABLE_VGROUP, mgmtRetrieveVgroups); + mgmtAddDClientRspHandle(TSDB_MSG_TYPE_MD_CREATE_VNODE_RSP, mgmtProcessCreateVnodeRsp); + mgmtAddDClientRspHandle(TSDB_MSG_TYPE_MD_DROP_VNODE_RSP, mgmtProcessDropVnodeRsp); mTrace("vgroup is initialized"); return 0; @@ -123,88 +130,56 @@ SVgObj *mgmtGetVgroup(int32_t vgId) { return (SVgObj *)sdbGetRow(tsVgroupSdb, &vgId); } -int32_t mgmtAllocateSid(SDbObj *pDb, SVgObj *pVgroup) { - int32_t sid = taosAllocateId(pVgroup->idPool); - if (sid < 0) { - mWarn("table:%s, vgroup:%d run out of ID, num:%d", pDb->name, pVgroup->vgId, taosIdPoolNumOfUsed(pVgroup->idPool)); - pDb->vgStatus = TSDB_VG_STATUS_IN_PROGRESS; - mgmtCreateVgroup(pDb); - terrno = TSDB_CODE_ACTION_IN_PROGRESS; - } - - terrno = 0; - return sid; -} - -/* - * TODO: check if there is enough sids - */ SVgObj *mgmtGetAvailableVgroup(SDbObj *pDb) { return pDb->pHead; } -void mgmtProcessVgTimer(void *handle, void *tmrId) { - SDbObj *pDb = (SDbObj *)handle; - if (pDb == NULL) return; - - if (pDb->vgStatus > TSDB_VG_STATUS_IN_PROGRESS) { - mTrace("db:%s, set vgroup status from %d to ready", pDb->name, pDb->vgStatus); - pDb->vgStatus = TSDB_VG_STATUS_READY; +void mgmtCreateVgroup(SQueuedMsg *pMsg) { + SDbObj *pDb = pMsg->pDb; + if (pDb == NULL) { + mError("failed to create vgroup, db not found"); + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_INVALID_DB); + return; } - pDb->vgTimer = NULL; -} - -SVgObj *mgmtCreateVgroup(SDbObj *pDb) { SVgObj *pVgroup = (SVgObj *)calloc(sizeof(SVgObj), 1); strcpy(pVgroup->dbName, pDb->name); pVgroup->numOfVnodes = pDb->cfg.replications; - pVgroup->createdTime = taosGetTimestampMs(); - - // based on load balance, create a new one if (mgmtAllocVnodes(pVgroup) != 0) { - mError("db:%s, no enough free dnode to alloc %d vnodes", pDb->name, pVgroup->numOfVnodes); + mError("db:%s, no enough dnode to alloc %d vnodes to vgroup", pDb->name, pVgroup->numOfVnodes); free(pVgroup); - pDb->vgStatus = TSDB_VG_STATUS_FULL; - taosTmrReset(mgmtProcessVgTimer, 5000, pDb, tsMgmtTmr, &pDb->vgTimer); - return NULL; + mgmtSendSimpleResp(pMsg->thandle, TSDB_CODE_NO_ENOUGH_DNODES); + return; } + pVgroup->createdTime = taosGetTimestampMs(); pVgroup->tableList = (STableInfo **) calloc(sizeof(STableInfo *), pDb->cfg.maxSessions); pVgroup->numOfTables = 0; pVgroup->idPool = taosInitIdPool(pDb->cfg.maxSessions); mgmtAddVgroupIntoDb(pDb, pVgroup); - mgmtSetDnodeVgid(pVgroup->vnodeGid, pVgroup->numOfVnodes, pVgroup->vgId); + // mgmtSetDnodeVgid(pVgroup->vnodeGid, pVgroup->numOfVnodes, pVgroup->vgId); sdbInsertRow(tsVgroupSdb, pVgroup, 0); - mTrace("vgroup:%d, vgroup is created, db:%s replica:%d", pVgroup->vgId, pDb->name, pVgroup->numOfVnodes); - for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) - mTrace("vgroup:%d, dnode:%s vnode:%d is created", pVgroup->vgId, taosIpStr(pVgroup->vnodeGid[i].ip), pVgroup->vnodeGid[i].vnode); + mPrint("vgroup:%d, is created in mnode, db:%s replica:%d", pVgroup->vgId, pDb->name, pVgroup->numOfVnodes); + for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) { + mPrint("vgroup:%d, dnode:%d vnode:%d", pVgroup->vgId, pVgroup->vnodeGid[i].dnodeId, pVgroup->vnodeGid[i].vnode); + } - return pVgroup; + pMsg->ahandle = pVgroup; + pMsg->expected = pVgroup->numOfVnodes; + mgmtSendCreateVgroupMsg(pVgroup, pMsg); } -int32_t mgmtDropVgroup(SDbObj *pDb, SVgObj *pVgroup) { - STableInfo *pTable; - - if (pVgroup->numOfTables > 0) { - for (int32_t i = 0; i < pDb->cfg.maxSessions; ++i) { - if (pVgroup->tableList != NULL) { - pTable = pVgroup->tableList[i]; - if (pTable) mgmtDropTable(pDb, pTable->tableId, 0); - } - } +void mgmtDropVgroup(SVgObj *pVgroup, void *ahandle) { + if (ahandle != NULL) { + mgmtSendDropVgroupMsg(pVgroup, ahandle); + } else { + mTrace("vgroup:%d, replica:%d is deleting from sdb", pVgroup->vgId, pVgroup->numOfVnodes); + mgmtSendDropVgroupMsg(pVgroup, NULL); + sdbDeleteRow(tsVgroupSdb, pVgroup); } - - mTrace("vgroup:%d, db:%s replica:%d is deleted", pVgroup->vgId, pDb->name, pVgroup->numOfVnodes); - - //mgmtSendDropVgroupMsg(pVgroup, NULL); - - sdbDeleteRow(tsVgroupSdb, pVgroup); - - return TSDB_CODE_SUCCESS; } void mgmtSetVgroupIdPool() { @@ -232,14 +207,14 @@ void mgmtCleanUpVgroups() { sdbCloseTable(tsVgroupSdb); } -int32_t mgmtGetVgroupMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { +int32_t mgmtGetVgroupMeta(STableMetaMsg *pMeta, SShowObj *pShow, void *pConn) { SDbObj *pDb = mgmtGetDb(pShow->db); if (pDb == NULL) { return TSDB_CODE_DB_NOT_SELECTED; } int32_t cols = 0; - SSchema *pSchema = tsGetSchema(pMeta); + SSchema *pSchema = pMeta->schema; pShow->bytes[cols] = 4; pSchema[cols].type = TSDB_DATA_TYPE_INT; @@ -255,7 +230,7 @@ int32_t mgmtGetVgroupMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { pShow->bytes[cols] = 9; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; - strcpy(pSchema[cols].name, "vgroup status"); + strcpy(pSchema[cols].name, "vgroup_status"); pSchema[cols].bytes = htons(pShow->bytes[cols]); cols++; @@ -295,13 +270,13 @@ int32_t mgmtGetVgroupMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { pShow->bytes[cols] = 9; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; - strcpy(pSchema[cols].name, "vnode status"); + strcpy(pSchema[cols].name, "vnode_status"); pSchema[cols].bytes = htons(pShow->bytes[cols]); cols++; pShow->bytes[cols] = 16; pSchema[cols].type = TSDB_DATA_TYPE_BINARY; - strcpy(pSchema[cols].name, "public ip"); + strcpy(pSchema[cols].name, "public_ip"); pSchema[cols].bytes = htons(pShow->bytes[cols]); cols++; } @@ -326,9 +301,9 @@ int32_t mgmtGetVgroupMeta(STableMeta *pMeta, SShowObj *pShow, void *pConn) { } char *mgmtGetVnodeStatus(SVgObj *pVgroup, SVnodeGid *pVnode) { - SDnodeObj *pDnode = mgmtGetDnode(pVnode->ip); + SDnodeObj *pDnode = mgmtGetDnode(pVnode->dnodeId); if (pDnode == NULL) { - mError("dnode:%s, vgroup:%d, vnode:%d dnode not exist", taosIpStr(pVnode->ip), pVgroup->vgId, pVnode->vnode); + mError("vgroup:%d, not exist in dnode:%d", pVgroup->vgId, pDnode->dnodeId); return "null"; } @@ -336,14 +311,13 @@ char *mgmtGetVnodeStatus(SVgObj *pVgroup, SVnodeGid *pVnode) { return "offline"; } - SVnodeLoad *vload = pDnode->vload + pVnode->vnode; - if (vload->vgId != pVgroup->vgId || vload->vnode != pVnode->vnode) { - mError("dnode:%s, vgroup:%d, vnode:%d not same with dnode vgroup:%d vnode:%d", - taosIpStr(pVnode->ip), pVgroup->vgId, pVnode->vnode, vload->vgId, vload->vnode); - return "null"; + for (int i = 0; i < pDnode->openVnodes; ++i) { + if (pDnode->vload[i].vgId == pVgroup->vgId) { + return (char*)taosGetVnodeStatusStr(pDnode->vload[i].status); + } } - - return (char*)taosGetVnodeStatusStr(vload->status); + + return "null"; } int32_t mgmtRetrieveVgroups(SShowObj *pShow, char *data, int32_t rows, void *pConn) { @@ -383,7 +357,7 @@ int32_t mgmtRetrieveVgroups(SShowObj *pShow, char *data, int32_t rows, void *pCo cols++; for (int32_t i = 0; i < maxReplica; ++i) { - tinet_ntoa(ipstr, pVgroup->vnodeGid[i].ip); + tinet_ntoa(ipstr, pVgroup->vnodeGid[i].privateIp); pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; strcpy(pWrite, ipstr); cols++; @@ -393,7 +367,7 @@ int32_t mgmtRetrieveVgroups(SShowObj *pShow, char *data, int32_t rows, void *pCo cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - if (pVgroup->vnodeGid[i].ip != 0) { + if (pVgroup->vnodeGid[i].dnodeId != 0) { char *vnodeStatus = mgmtGetVnodeStatus(pVgroup, pVgroup->vnodeGid + i); strcpy(pWrite, vnodeStatus); } else { @@ -415,6 +389,11 @@ int32_t mgmtRetrieveVgroups(SShowObj *pShow, char *data, int32_t rows, void *pCo } static void *mgmtVgroupActionInsert(void *row, char *str, int32_t size, int32_t *ssize) { + SVgObj *pVgroup = row; + for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) { + pVgroup->vnodeGid[i].vnode = pVgroup->vgId; + } + return NULL; } @@ -426,7 +405,7 @@ static void *mgmtVgroupActionDelete(void *row, char *str, int32_t size, int32_t mgmtRemoveVgroupFromDb(pDb, pVgroup); } - mgmtUnSetDnodeVgid(pVgroup->vnodeGid, pVgroup->numOfVnodes); + // mgmtUnSetDnodeVgid(pVgroup->vnodeGid, pVgroup->numOfVnodes); tfree(pVgroup->tableList); return NULL; @@ -510,17 +489,16 @@ void mgmtRemoveTableFromVgroup(SVgObj *pVgroup, STableInfo *pTable) { taosFreeId(pVgroup->idPool, pTable->sid); } -SMDCreateVnodeMsg *mgmtBuildCreateVnodeMsg(SVgObj *pVgroup, int32_t vnode) { +SMDCreateVnodeMsg *mgmtBuildCreateVnodeMsg(SVgObj *pVgroup) { SDbObj *pDb = mgmtGetDb(pVgroup->dbName); if (pDb == NULL) return NULL; - SMDCreateVnodeMsg *pVPeers = rpcMallocCont(sizeof(SMDCreateVnodeMsg)); - if (pVPeers == NULL) return NULL; + SMDCreateVnodeMsg *pVnode = rpcMallocCont(sizeof(SMDCreateVnodeMsg)); + if (pVnode == NULL) return NULL; - pVPeers->vnode = htonl(vnode); - pVPeers->cfg = pDb->cfg; + pVnode->cfg = pDb->cfg; - SVnodeCfg *pCfg = &pVPeers->cfg; + SVnodeCfg *pCfg = &pVnode->cfg; pCfg->vgId = htonl(pVgroup->vgId); pCfg->maxSessions = htonl(pCfg->maxSessions); pCfg->cacheBlockSize = htonl(pCfg->cacheBlockSize); @@ -534,13 +512,12 @@ SMDCreateVnodeMsg *mgmtBuildCreateVnodeMsg(SVgObj *pVgroup, int32_t vnode) { pCfg->replications = (char) pVgroup->numOfVnodes; pCfg->rowsInFileBlock = htonl(pCfg->rowsInFileBlock); - SVPeerDesc *vpeerDesc = pVPeers->vpeerDesc; + SVnodeDesc *vpeerDesc = pVnode->vpeerDesc; for (int32_t j = 0; j < pVgroup->numOfVnodes; ++j) { - vpeerDesc[j].ip = htonl(pVgroup->vnodeGid[j].ip); - vpeerDesc[j].vnode = htonl(pVgroup->vnodeGid[j].vnode); + vpeerDesc[j].ip = htonl(pVgroup->vnodeGid[j].privateIp); } - return pVPeers; + return pVnode; } SVgObj *mgmtGetVgroupByVnode(uint32_t dnode, int32_t vnode) { @@ -558,35 +535,186 @@ SVgObj *mgmtGetVgroupByVnode(uint32_t dnode, int32_t vnode) { } SRpcIpSet mgmtGetIpSetFromVgroup(SVgObj *pVgroup) { - SRpcIpSet ipSet = {.numOfIps = pVgroup->numOfVnodes, .inUse = 0, .port = tsMgmtDnodePort + 1}; + SRpcIpSet ipSet = { + .numOfIps = pVgroup->numOfVnodes, + .inUse = 0, + .port = tsDnodeMnodePort + }; for (int i = 0; i < pVgroup->numOfVnodes; ++i) { - ipSet.ip[i] = pVgroup->vnodeGid[i].ip; + ipSet.ip[i] = pVgroup->vnodeGid[i].privateIp; } return ipSet; } SRpcIpSet mgmtGetIpSetFromIp(uint32_t ip) { - SRpcIpSet ipSet = {.ip[0] = ip, .numOfIps = 1, .inUse = 0, .port = tsMgmtDnodePort + 1}; + SRpcIpSet ipSet = { + .ip[0] = ip, + .numOfIps = 1, + .inUse = 0, + .port = tsDnodeMnodePort + }; return ipSet; } -void mgmtSendCreateVnodeMsg(SVgObj *pVgroup, int32_t vnode, SRpcIpSet *ipSet, void *ahandle) { - mTrace("vgroup:%d, send create vnode:%d msg, ahandle:%p", pVgroup->vgId, vnode, ahandle); - SMDCreateVnodeMsg *pCreate = mgmtBuildCreateVnodeMsg(pVgroup, vnode); +void mgmtSendCreateVnodeMsg(SVgObj *pVgroup, SRpcIpSet *ipSet, void *ahandle) { + mTrace("vgroup:%d, send create vnode:%d msg, ahandle:%p", pVgroup->vgId, pVgroup->vgId, ahandle); + SMDCreateVnodeMsg *pCreate = mgmtBuildCreateVnodeMsg(pVgroup); + SRpcMsg rpcMsg = { + .handle = ahandle, + .pCont = pCreate, + .contLen = pCreate ? sizeof(SMDCreateVnodeMsg) : 0, + .code = 0, + .msgType = TSDB_MSG_TYPE_MD_CREATE_VNODE + }; + mgmtSendMsgToDnode(ipSet, &rpcMsg); +} + +void mgmtSendCreateVgroupMsg(SVgObj *pVgroup, void *ahandle) { + mTrace("vgroup:%d, send create all vnodes msg, ahandle:%p", pVgroup->vgId, ahandle); + for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) { + SRpcIpSet ipSet = mgmtGetIpSetFromIp(pVgroup->vnodeGid[i].privateIp); + mgmtSendCreateVnodeMsg(pVgroup, &ipSet, ahandle); + } +} + +static void mgmtProcessCreateVnodeRsp(SRpcMsg *rpcMsg) { + if (rpcMsg->handle == NULL) return; + + SQueuedMsg *queueMsg = rpcMsg->handle; + queueMsg->received++; + if (rpcMsg->code == TSDB_CODE_SUCCESS) { + queueMsg->code = rpcMsg->code; + queueMsg->successed++; + } + + SVgObj *pVgroup = queueMsg->ahandle; + mTrace("vgroup:%d, create vnode rsp received, result:%s received:%d successed:%d expected:%d, thandle:%p ahandle:%p", + pVgroup->vgId, tstrerror(rpcMsg->code), queueMsg->received, queueMsg->successed, queueMsg->expected, + queueMsg->thandle, rpcMsg->handle); + + if (queueMsg->received != queueMsg->expected) return; + + if (queueMsg->received == queueMsg->successed) { + SQueuedMsg *newMsg = calloc(1, sizeof(SQueuedMsg)); + newMsg->msgType = queueMsg->msgType; + newMsg->thandle = queueMsg->thandle; + newMsg->pDb = queueMsg->pDb; + newMsg->pUser = queueMsg->pUser; + newMsg->contLen = queueMsg->contLen; + newMsg->pCont = rpcMallocCont(newMsg->contLen); + memcpy(newMsg->pCont, queueMsg->pCont, newMsg->contLen); + mgmtAddToShellQueue(newMsg); + } else { + sdbDeleteRow(tsVgroupSdb, pVgroup); + mgmtSendSimpleResp(queueMsg->thandle, rpcMsg->code); + } + + free(queueMsg); +} + +static SMDDropVnodeMsg *mgmtBuildDropVnodeMsg(int32_t vgId) { + SMDDropVnodeMsg *pDrop = rpcMallocCont(sizeof(SMDDropVnodeMsg)); + if (pDrop == NULL) return NULL; + + pDrop->vgId = htonl(vgId); + return pDrop; +} + +void mgmtSendDropVnodeMsg(int32_t vgId, SRpcIpSet *ipSet, void *ahandle) { + mTrace("vgroup:%d, send drop vnode msg, ahandle:%p", vgId, ahandle); + SMDDropVnodeMsg *pDrop = mgmtBuildDropVnodeMsg(vgId); SRpcMsg rpcMsg = { .handle = ahandle, - .pCont = pCreate, - .contLen = pCreate ? sizeof(SMDCreateVnodeMsg) : 0, + .pCont = pDrop, + .contLen = pDrop ? sizeof(SMDDropVnodeMsg) : 0, .code = 0, - .msgType = TSDB_MSG_TYPE_MD_CREATE_VNODE + .msgType = TSDB_MSG_TYPE_MD_DROP_VNODE }; mgmtSendMsgToDnode(ipSet, &rpcMsg); } -void mgmtSendCreateVgroupMsg(SVgObj *pVgroup, void *ahandle) { - mTrace("vgroup:%d, send create all vnodes msg, handle:%p", pVgroup->vgId, ahandle); +static void mgmtSendDropVgroupMsg(SVgObj *pVgroup, void *ahandle) { + mTrace("vgroup:%d, send drop all vnodes msg, ahandle:%p", pVgroup->vgId, ahandle); for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) { - SRpcIpSet ipSet = mgmtGetIpSetFromIp(pVgroup->vnodeGid[i].ip); - mgmtSendCreateVnodeMsg(pVgroup, pVgroup->vnodeGid[i].vnode, &ipSet, ahandle); + SRpcIpSet ipSet = mgmtGetIpSetFromIp(pVgroup->vnodeGid[i].privateIp); + mgmtSendDropVnodeMsg(pVgroup->vgId, &ipSet, ahandle); } -} \ No newline at end of file +} + +static void mgmtProcessDropVnodeRsp(SRpcMsg *rpcMsg) { + mTrace("drop vnode msg is received"); + if (rpcMsg->handle == NULL) return; + + SQueuedMsg *queueMsg = rpcMsg->handle; + queueMsg->received++; + if (rpcMsg->code == TSDB_CODE_SUCCESS) { + queueMsg->code = rpcMsg->code; + queueMsg->successed++; + } + + SVgObj *pVgroup = queueMsg->ahandle; + mTrace("vgroup:%d, drop vnode rsp received, result:%s received:%d successed:%d expected:%d, thandle:%p ahandle:%p", + pVgroup->vgId, tstrerror(rpcMsg->code), queueMsg->received, queueMsg->successed, queueMsg->expected, + queueMsg->thandle, rpcMsg->handle); + + if (queueMsg->received != queueMsg->expected) return; + + sdbDeleteRow(tsVgroupSdb, pVgroup); + + SQueuedMsg *newMsg = calloc(1, sizeof(SQueuedMsg)); + newMsg->msgType = queueMsg->msgType; + newMsg->thandle = queueMsg->thandle; + newMsg->pDb = queueMsg->pDb; + newMsg->pUser = queueMsg->pUser; + newMsg->contLen = queueMsg->contLen; + newMsg->pCont = rpcMallocCont(newMsg->contLen); + memcpy(newMsg->pCont, queueMsg->pCont, newMsg->contLen); + mgmtAddToShellQueue(newMsg); + + free(queueMsg); +} + +void mgmtUpdateVgroupIp(SDnodeObj *pDnode) { + void * pNode = NULL; + SVgObj *pVgroup = NULL; + while (1) { + pNode = sdbFetchRow(tsVgroupSdb, pNode, (void **)&pVgroup); + if (pVgroup == NULL) break; + + for (int32_t i = 0; i < pVgroup->numOfVnodes; ++i) { + SVnodeGid *vnodeGid = pVgroup->vnodeGid + i; + if (vnodeGid->dnodeId == pDnode->dnodeId) { + mPrint("vgroup:%d, dnode:%d, privateIp:%s change to %s, publicIp:%s change to %s", + pVgroup->vgId, vnodeGid->dnodeId, pDnode->privateIp, taosIpStr(vnodeGid->privateIp), + pDnode->publicIp, taosIpStr(vnodeGid->publicIp)); + vnodeGid->publicIp = pDnode->publicIp; + vnodeGid->privateIp = pDnode->privateIp; + sdbUpdateRow(tsVgroupSdb, pVgroup, tsVgUpdateSize, 1); + } + } + } +} + +//static void mgmtProcessVnodeCfgMsg(int8_t msgType, int8_t *pCont, int32_t contLen, void *pConn) { +// if (!sdbMaster) { +// mgmtSendRspToDnode(pConn, msgType + 1, TSDB_CODE_REDIRECT, NULL, 0); +// return; +// } +// +// SDMConfigVnodeMsg *pCfg = (SDMConfigVnodeMsg *) pCont; +// pCfg->dnode = htonl(pCfg->dnode); +// pCfg->vnode = htonl(pCfg->vnode); +// +// SVgObj *pVgroup = mgmtGetVgroupByVnode(pCfg->dnode, pCfg->vnode); +// if (pVgroup == NULL) { +// mTrace("dnode:%s, vnode:%d, no vgroup info", taosIpStr(pCfg->dnode), pCfg->vnode); +// mgmtSendRspToDnode(pConn, msgType + 1, TSDB_CODE_NOT_ACTIVE_VNODE, NULL, 0); +// return; +// } +// +// mgmtSendRspToDnode(pConn, msgType + 1, TSDB_CODE_SUCCESS, NULL, 0); +// +// SRpcIpSet ipSet = mgmtGetIpSetFromIp(pCfg->dnode); +// mgmtSendCreateVnodeMsg(pVgroup, pCfg->vnode, &ipSet, NULL); +//} +// \ No newline at end of file diff --git a/src/os/darwin/src/tdarwin.c b/src/os/darwin/src/tdarwin.c index ff9576542f2bda45db0d016d27a6957e0fd19c9a..7896592030f8e18e5d4be5efad0bf7c78100dfb8 100644 --- a/src/os/darwin/src/tdarwin.c +++ b/src/os/darwin/src/tdarwin.c @@ -34,7 +34,7 @@ #include "tutil.h" char configDir[TSDB_FILENAME_LEN] = "/etc/taos"; -char tsDirectory[TSDB_FILENAME_LEN] = "/var/lib/taos"; +char tsVnodeDir[TSDB_FILENAME_LEN] = "/var/lib/taos"; char dataDir[TSDB_FILENAME_LEN] = "/var/lib/taos"; char logDir[TSDB_FILENAME_LEN] = "~/TDengineLog"; char scriptDir[TSDB_FILENAME_LEN] = "/etc/taos"; diff --git a/src/os/linux/src/tlinux.c b/src/os/linux/src/tlinux.c index 98faffdfd27fb0dbdbd641141a2f8ec99ead3f49..bce4a8f13db7727cda101943529ebbd23481a32a 100644 --- a/src/os/linux/src/tlinux.c +++ b/src/os/linux/src/tlinux.c @@ -35,7 +35,9 @@ #include "ttimer.h" char configDir[TSDB_FILENAME_LEN] = "/etc/taos"; -char tsDirectory[TSDB_FILENAME_LEN] = "/var/lib/taos"; +char tsVnodeDir[TSDB_FILENAME_LEN] = {0}; +char tsDnodeDir[TSDB_FILENAME_LEN] = {0}; +char tsMnodeDir[TSDB_FILENAME_LEN] = {0}; char dataDir[TSDB_FILENAME_LEN] = "/var/lib/taos"; char logDir[TSDB_FILENAME_LEN] = "/var/log/taos"; char scriptDir[TSDB_FILENAME_LEN] = "/etc/taos"; diff --git a/src/os/windows/src/twindows.c b/src/os/windows/src/twindows.c index 15e42d8948faf30c7f4462479fc5e2eed549ff50..83c6017b398c9321aa2fd8bf39e05fcc6871dcd3 100644 --- a/src/os/windows/src/twindows.c +++ b/src/os/windows/src/twindows.c @@ -33,7 +33,7 @@ #include char configDir[TSDB_FILENAME_LEN] = "C:/TDengine/cfg"; -char tsDirectory[TSDB_FILENAME_LEN] = "C:/TDengine/data"; +char tsVnodeDir[TSDB_FILENAME_LEN] = "C:/TDengine/data"; char logDir[TSDB_FILENAME_LEN] = "C:/TDengine/log"; char dataDir[TSDB_FILENAME_LEN] = "C:/TDengine/data"; char scriptDir[TSDB_FILENAME_LEN] = "C:/TDengine/script"; diff --git a/src/query/CMakeLists.txt b/src/query/CMakeLists.txt index c0ded1dafb70cf36ea05886cbe17904b18addc24..0e51962f49999ae55c1c18aa5acd9d9baba64be7 100644 --- a/src/query/CMakeLists.txt +++ b/src/query/CMakeLists.txt @@ -4,10 +4,12 @@ PROJECT(TDengine) INCLUDE_DIRECTORIES(${TD_OS_DIR}/inc) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/inc) INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/util/inc) +INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/common/inc) +INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/src/vnode/tsdb/inc) INCLUDE_DIRECTORIES(inc) IF ((TD_LINUX_64) OR (TD_LINUX_32 AND TD_ARM)) AUX_SOURCE_DIRECTORY(src SRC) ADD_LIBRARY(query ${SRC}) - TARGET_LINK_LIBRARIES(query util m rt) + TARGET_LINK_LIBRARIES(query tsdb tutil m rt) ENDIF () \ No newline at end of file diff --git a/src/query/inc/tast.h b/src/query/inc/qast.h similarity index 100% rename from src/query/inc/tast.h rename to src/query/inc/qast.h diff --git a/src/query/inc/textbuffer.h b/src/query/inc/qextbuffer.h similarity index 97% rename from src/query/inc/textbuffer.h rename to src/query/inc/qextbuffer.h index 1f2955ba0d231c1aa5c2abefb72f4b2bc4c10c8b..598b809d92156c6fea1bcacd5dabafb89d4ee490 100644 --- a/src/query/inc/textbuffer.h +++ b/src/query/inc/qextbuffer.h @@ -124,20 +124,12 @@ typedef struct tTagSchema { typedef struct tSidSet { int32_t numOfSids; int32_t numOfSubSet; - STableSidExtInfo **pSids; + STableIdInfo **pTableIdList; int32_t * starterPos; // position of each subgroup, generated according to - SColumnModel *pColumnModel; SColumnOrderInfo orderIdx; } tSidSet; -/** - * - * @param fileNamePattern - * @param dstPath - */ -void getTmpfilePath(const char *fileNamePattern, char *dstPath); - /** * * @param inMemSize diff --git a/src/query/inc/thistogram.h b/src/query/inc/qhistogram.h similarity index 100% rename from src/query/inc/thistogram.h rename to src/query/inc/qhistogram.h diff --git a/src/query/inc/tinterpolation.h b/src/query/inc/qinterpolation.h similarity index 98% rename from src/query/inc/tinterpolation.h rename to src/query/inc/qinterpolation.h index f4b327bcbec82b2b9ca8e2f5c92b044700240dbc..c8ebd850b61cdc69e5816837d5a439a10ba99e43 100644 --- a/src/query/inc/tinterpolation.h +++ b/src/query/inc/qinterpolation.h @@ -20,6 +20,10 @@ extern "C" { #endif +#include "os.h" +#include "taosdef.h" +#include "qextbuffer.h" + typedef struct SInterpolationInfo { int64_t startTimestamp; int32_t order; // order [asc/desc] diff --git a/src/inc/tpercentile.h b/src/query/inc/qpercentile.h similarity index 94% rename from src/inc/tpercentile.h rename to src/query/inc/qpercentile.h index b9cf50e0bbf24357b729f8bc39996f589d6c18fc..f8f09357c635afd096cab622daab186eb2fbc512 100644 --- a/src/inc/tpercentile.h +++ b/src/query/inc/qpercentile.h @@ -13,10 +13,10 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_TPERCENTILE_H -#define TDENGINE_TPERCENTILE_H +#ifndef TDENGINE_QPERCENTILE_H +#define TDENGINE_QPERCENTILE_H -#include "textbuffer.h" +#include "qextbuffer.h" typedef struct MinMaxEntry { union { @@ -74,4 +74,4 @@ void tBucketIntHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t * void tBucketDoubleHash(tMemBucket *pBucket, void *value, int16_t *segIdx, int16_t *slotIdx); -#endif // TDENGINE_TPERCENTILE_H +#endif // TDENGINE_QPERCENTILE_H diff --git a/src/query/inc/tresultBuf.h b/src/query/inc/qresultBuf.h similarity index 74% rename from src/query/inc/tresultBuf.h rename to src/query/inc/qresultBuf.h index 8f30ff7c61555e993838989aecb32a81d1c414bf..39600f512963745a90bd082bdfc3c412d38374c7 100644 --- a/src/query/inc/tresultBuf.h +++ b/src/query/inc/qresultBuf.h @@ -21,7 +21,7 @@ extern "C" { #endif #include "os.h" -#include "textbuffer.h" +#include "qextbuffer.h" typedef struct SIDList { uint32_t alloc; @@ -29,7 +29,7 @@ typedef struct SIDList { int32_t* pData; } SIDList; -typedef struct SQueryDiskbasedResultBuf { +typedef struct SDiskbasedResultBuf { int32_t numOfRowsPerPage; int32_t numOfPages; int64_t totalBufSize; @@ -42,7 +42,7 @@ typedef struct SQueryDiskbasedResultBuf { uint32_t numOfAllocGroupIds; // number of allocated id list void* idsTable; // id hash table SIDList* list; // for each id, there is a page id list -} SQueryDiskbasedResultBuf; +} SDiskbasedResultBuf; /** * create disk-based result buffer @@ -51,7 +51,7 @@ typedef struct SQueryDiskbasedResultBuf { * @param rowSize * @return */ -int32_t createDiskbasedResultBuffer(SQueryDiskbasedResultBuf** pResultBuf, int32_t size, int32_t rowSize); +int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t size, int32_t rowSize); /** * @@ -60,14 +60,14 @@ int32_t createDiskbasedResultBuffer(SQueryDiskbasedResultBuf** pResultBuf, int32 * @param pageId * @return */ -tFilePage* getNewDataBuf(SQueryDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId); +tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId); /** * * @param pResultBuf * @return */ -int32_t getNumOfRowsPerPage(SQueryDiskbasedResultBuf* pResultBuf); +int32_t getNumOfRowsPerPage(SDiskbasedResultBuf* pResultBuf); /** * @@ -75,7 +75,7 @@ int32_t getNumOfRowsPerPage(SQueryDiskbasedResultBuf* pResultBuf); * @param groupId * @return */ -SIDList getDataBufPagesIdList(SQueryDiskbasedResultBuf* pResultBuf, int32_t groupId); +SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId); /** * get the specified buffer page by id @@ -83,27 +83,27 @@ SIDList getDataBufPagesIdList(SQueryDiskbasedResultBuf* pResultBuf, int32_t grou * @param id * @return */ -tFilePage* getResultBufferPageById(SQueryDiskbasedResultBuf* pResultBuf, int32_t id); +tFilePage* getResultBufferPageById(SDiskbasedResultBuf* pResultBuf, int32_t id); /** * get the total buffer size in the format of disk file * @param pResultBuf * @return */ -int32_t getResBufSize(SQueryDiskbasedResultBuf* pResultBuf); +int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf); /** * get the number of groups in the result buffer * @param pResultBuf * @return */ -int32_t getNumOfResultBufGroupId(SQueryDiskbasedResultBuf* pResultBuf); +int32_t getNumOfResultBufGroupId(SDiskbasedResultBuf* pResultBuf); /** * destroy result buffer * @param pResultBuf */ -void destroyResultBuf(SQueryDiskbasedResultBuf* pResultBuf); +void destroyResultBuf(SDiskbasedResultBuf* pResultBuf); /** * diff --git a/src/query/inc/qsqlparser.h b/src/query/inc/qsqlparser.h index 7a1322be10fb24581b9f0d38db173a7d7f86612e..064ded2fad49d3001596d6e2e7ac762ce59829b9 100644 --- a/src/query/inc/qsqlparser.h +++ b/src/query/inc/qsqlparser.h @@ -329,6 +329,18 @@ void tSQLSetColumnType(TAOS_FIELD *pField, SSQLToken *pToken); void *ParseAlloc(void *(*mallocProc)(size_t)); +enum { + TSQL_NODE_TYPE_EXPR = 0x1, + TSQL_NODE_TYPE_ID = 0x2, + TSQL_NODE_TYPE_VALUE = 0x4, +}; + +#define NON_ARITHMEIC_EXPR 0 +#define NORMAL_ARITHMETIC 1 +#define AGG_ARIGHTMEIC 2 + +int32_t tSQLParse(SSqlInfo *pSQLInfo, const char *pSql); + #ifdef __cplusplus } #endif diff --git a/src/query/inc/tscSyntaxtreefunction.h b/src/query/inc/qsyntaxtreefunction.h similarity index 100% rename from src/query/inc/tscSyntaxtreefunction.h rename to src/query/inc/qsyntaxtreefunction.h diff --git a/src/client/inc/tscJoinProcess.h b/src/query/inc/qtsbuf.h similarity index 61% rename from src/client/inc/tscJoinProcess.h rename to src/query/inc/qtsbuf.h index 34764e4db62469af14592a026015c88b53a03fa5..1afdb0cd6cb9175b14675d06446afbe3a891df27 100644 --- a/src/client/inc/tscJoinProcess.h +++ b/src/query/inc/qtsbuf.h @@ -13,30 +13,19 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_TSCJOINPROCESS_H -#define TDENGINE_TSCJOINPROCESS_H +#ifndef TDENGINE_STSBUF_H +#define TDENGINE_STSBUF_H #ifdef __cplusplus extern "C" { #endif -#include "tscUtil.h" -#include "tsclient.h" +#include "os.h" +#include "taosdef.h" -void tscFetchDatablockFromSubquery(SSqlObj* pSql); -void tscGetQualifiedTSList(SSqlObj* pSql, SJoinSubquerySupporter* p1, SJoinSubquerySupporter* p2, int32_t* num); - -void tscSetupOutputColumnIndex(SSqlObj* pSql); -int32_t tscLaunchSecondPhaseSubqueries(SSqlObj* pSql); -void tscJoinQueryCallback(void* param, TAOS_RES* tres, int code); - -SJoinSubquerySupporter* tscCreateJoinSupporter(SSqlObj* pSql, SSubqueryState* pState, int32_t index); -void tscDestroyJoinSupporter(SJoinSubquerySupporter* pSupporter); - -#define MEM_BUF_SIZE (1<<20) -#define TS_COMP_BLOCK_PADDING 0xFFFFFFFF -#define TS_COMP_FILE_MAGIC 0x87F5EC4C -#define TS_COMP_FILE_VNODE_MAX 512 +#define MEM_BUF_SIZE (1 << 20) +#define TS_COMP_FILE_MAGIC 0x87F5EC4C +#define TS_COMP_FILE_VNODE_MAX 512 typedef struct STSList { char* rawBuf; @@ -73,16 +62,15 @@ typedef struct STSBlock { char* payload; // actual data that is compressed } STSBlock; +/* + * The size of buffer file should not be greater than 2G, + * and the offset of int32_t type is enough + */ typedef struct STSVnodeBlockInfo { - int32_t vnode; - - /* - * The size of buffer file is not expected to be greater than 2G, - * and the offset of int32_t type is enough - */ - int32_t offset; - int32_t numOfBlocks; - int32_t compLen; + int32_t vnode; // vnode id + int32_t offset; // offset set value in file + int32_t numOfBlocks; // number of total blocks + int32_t compLen; // compressed size } STSVnodeBlockInfo; typedef struct STSVnodeBlockInfoEx { @@ -99,22 +87,20 @@ typedef struct STSBuf { int32_t numOfAlloc; int32_t numOfVnodes; - char* assistBuf; - int32_t bufSize; - STSBlock block; - STSList tsData; // uncompressed raw ts data - - uint64_t numOfTotal; - bool autoDelete; - int32_t tsOrder; // order of timestamp in ts comp buffer - + char* assistBuf; + int32_t bufSize; + STSBlock block; + STSList tsData; // uncompressed raw ts data + uint64_t numOfTotal; + bool autoDelete; + int32_t tsOrder; // order of timestamp in ts comp buffer STSCursor cur; } STSBuf; typedef struct STSBufFileHeader { - uint32_t magic; // file magic number - uint32_t numOfVnode; // number of vnode stored in current file - uint32_t tsOrder; // timestamp order in current file + uint32_t magic; // file magic number + uint32_t numOfVnode; // number of vnode stored in current file + uint32_t tsOrder; // timestamp order in current file } STSBufFileHeader; STSBuf* tsBufCreate(bool autoDelete); @@ -123,24 +109,25 @@ STSBuf* tsBufCreateFromCompBlocks(const char* pData, int32_t numOfBlocks, int32_ void* tsBufDestory(STSBuf* pTSBuf); -void tsBufAppend(STSBuf* pTSBuf, int32_t vnodeId, int64_t tag, const char* pData, int32_t len); +void tsBufAppend(STSBuf* pTSBuf, int32_t vnodeId, int64_t tag, const char* pData, int32_t len); int32_t tsBufMerge(STSBuf* pDestBuf, const STSBuf* pSrcBuf, int32_t vnodeIdx); +STSBuf* tsBufClone(STSBuf* pTSBuf); + STSVnodeBlockInfo* tsBufGetVnodeBlockInfo(STSBuf* pTSBuf, int32_t vnodeId); void tsBufFlush(STSBuf* pTSBuf); -void tsBufResetPos(STSBuf* pTSBuf); +void tsBufResetPos(STSBuf* pTSBuf); STSElem tsBufGetElem(STSBuf* pTSBuf); -bool tsBufNextPos(STSBuf* pTSBuf); +bool tsBufNextPos(STSBuf* pTSBuf); STSElem tsBufGetElemStartPos(STSBuf* pTSBuf, int32_t vnodeId, int64_t tag); STSCursor tsBufGetCursor(STSBuf* pTSBuf); -void tsBufSetTraverseOrder(STSBuf* pTSBuf, int32_t order); +void tsBufSetTraverseOrder(STSBuf* pTSBuf, int32_t order); void tsBufSetCursor(STSBuf* pTSBuf, STSCursor* pCur); -STSBuf* tsBufClone(STSBuf* pTSBuf); /** * display all data in comp block file, for debug purpose only @@ -152,4 +139,4 @@ void tsBufDisplay(STSBuf* pTSBuf); } #endif -#endif // TDENGINE_TSCJOINPROCESS_H +#endif // TDENGINE_STSBUF_H diff --git a/src/query/inc/queryExecutor.h b/src/query/inc/queryExecutor.h new file mode 100644 index 0000000000000000000000000000000000000000..8956fb52b13e557ec5f1d36a7210cacff72a35e8 --- /dev/null +++ b/src/query/inc/queryExecutor.h @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#ifndef TDENGINE_QUERYEXECUTOR_H +#define TDENGINE_QUERYEXECUTOR_H + +#include "os.h" + +#include "hash.h" +#include "qinterpolation.h" +#include "qresultBuf.h" +#include "qsqlparser.h" +#include "qtsbuf.h" +#include "taosdef.h" +#include "tref.h" +#include "tsqlfunction.h" +#include "tarray.h" + +typedef struct SData { + int32_t num; + char data[]; +} SData; + +enum { + ST_QUERY_KILLED = 0, // query killed + ST_QUERY_PAUSED = 1, // query paused, due to full of the response buffer + ST_QUERY_COMPLETED = 2, // query completed +}; + +struct SColumnFilterElem; +typedef bool (*__filter_func_t)(struct SColumnFilterElem* pFilter, char* val1, char* val2); +typedef int32_t (*__block_search_fn_t)(char* data, int32_t num, int64_t key, int32_t order); + +typedef struct SSqlGroupbyExpr { + int16_t tableIndex; + int16_t numOfGroupCols; + SColIndexEx columnInfo[TSDB_MAX_TAGS]; // group by columns information + int16_t orderIndex; // order by column index + int16_t orderType; // order by type: asc/desc +} SSqlGroupbyExpr; + +typedef struct SPosInfo { + int16_t pageId; + int16_t rowId; +} SPosInfo; + +typedef struct SWindowStatus { + bool closed; +} SWindowStatus; + +typedef struct SWindowResult { + uint16_t numOfRows; + SPosInfo pos; // Position of current result in disk-based output buffer + SResultInfo* resultInfo; // For each result column, there is a resultInfo + STimeWindow window; // The time window that current result covers. + SWindowStatus status; +} SWindowResult; + +typedef struct SResultRec { + int64_t pointsTotal; + int64_t pointsRead; +} SResultRec; + +typedef struct SWindowResInfo { + SWindowResult* pResult; // result list + void* hashList; // hash list for quick access + int16_t type; // data type for hash key + int32_t capacity; // max capacity + int32_t curIndex; // current start active index + int32_t size; // number of result set + int64_t startTime; // start time of the first time window for sliding query + int64_t prevSKey; // previous (not completed) sliding window start key + int64_t threshold; // threshold to pausing query and return closed results. +} SWindowResInfo; + +typedef struct SColumnFilterElem { + int16_t bytes; // column length + __filter_func_t fp; + SColumnFilterInfo filterInfo; +} SColumnFilterElem; + +typedef struct SSingleColumnFilterInfo { + SColumnInfoEx info; + int32_t numOfFilters; + SColumnFilterElem* pFilters; + void* pData; +} SSingleColumnFilterInfo; + +/* intermediate pos during multimeter query involves interval */ +typedef struct STableQueryInfo { + int64_t lastKey; + STimeWindow win; + int32_t numOfRes; + int16_t queryRangeSet; // denote if the query range is set, only available for interval query + int64_t tag; + STSCursor cur; + int32_t sid; // for retrieve the page id list + + SWindowResInfo windowResInfo; +} STableQueryInfo; + +typedef struct STableDataInfo { + int32_t numOfBlocks; + int32_t start; // start block index + int32_t tableIndex; + void* pMeterObj; + int32_t groupIdx; // group id in table list + STableQueryInfo* pTableQInfo; +} STableDataInfo; + +typedef struct SQuery { + int16_t numOfCols; + SOrderVal order; + STimeWindow window; + int64_t intervalTime; + int64_t slidingTime; // sliding time for sliding window query + char slidingTimeUnit; // interval data type, used for daytime revise + int8_t precision; + int16_t numOfOutputCols; + int16_t interpoType; + int16_t checkBufferInLoop; // check if the buffer is full during scan each block + SLimitVal limit; + int32_t rowSize; + SSqlGroupbyExpr* pGroupbyExpr; + SSqlFunctionExpr* pSelectExpr; + SColumnInfoEx* colList; + int32_t numOfFilterCols; + int64_t* defaultVal; + TSKEY lastKey; + uint32_t status; // query status + SResultRec rec; + int32_t pos; + int64_t pointsOffset; // the number of points offset to save read data + SData** sdata; + int32_t capacity; + SSingleColumnFilterInfo* pFilterInfo; +} SQuery; + +typedef struct SQueryCostSummary { +} SQueryCostSummary; + +typedef struct SQueryRuntimeEnv { + SResultInfo* resultInfo; // todo refactor to merge with SWindowResInfo + SQuery* pQuery; + SData** pInterpoBuf; + SQLFunctionCtx* pCtx; + int16_t numOfRowsPerPage; + int16_t offset[TSDB_MAX_COLUMNS]; + uint16_t scanFlag; // denotes reversed scan of data or not + SInterpolationInfo interpoInfo; + SWindowResInfo windowResInfo; + STSBuf* pTSBuf; + STSCursor cur; + SQueryCostSummary summary; + bool stableQuery; // super table query or not + void* pQueryHandle; + + SDiskbasedResultBuf* pResultBuf; // query result buffer based on blocked-wised disk file +} SQueryRuntimeEnv; + +typedef struct SQInfo { + uint64_t signature; + TSKEY startTime; + int64_t elapsedTime; + SResultRec rec; + int32_t pointsReturned; + int32_t pointsInterpo; + int32_t code; // error code to returned to client + int32_t killed; // denotes if current query is killed + sem_t dataReady; + SArray* pTableIdList; // table list + SQueryRuntimeEnv runtimeEnv; + int32_t subgroupIdx; + int32_t offset; /* offset in group result set of subgroup */ +// tSidSet* pSidSet; + + T_REF_DECLARE() + /* + * the query is executed position on which meter of the whole list. + * when the index reaches the last one of the list, it means the query is completed. + * We later may refactor to remove this attribution by using another flag to denote + * whether a multimeter query is completed or not. + */ + int32_t tableIndex; + int32_t numOfGroupResultPages; + STableDataInfo* pTableDataInfo; + TSKEY* tsList; +} SQInfo; + +/** + * create the qinfo object before adding the query task to each tsdb query worker + * + * @param pReadMsg + * @param pQInfo + * @return + */ +int32_t qCreateQueryInfo(SQueryTableMsg* pQueryTableMsg, SQInfo** pQInfo); + +/** + * query on single table + * @param pReadMsg + */ +void qTableQuery(SQInfo* pQInfo); + +/** + * query on super table + * @param pReadMsg + */ +void qSuperTableQuery(void* pReadMsg); + +/** + * wait for the query completed, and retrieve final results to client + * @param pQInfo + */ +int32_t qRetrieveQueryResultInfo(SQInfo* pQInfo, int32_t *numOfRows, int32_t* rowsize); + + +//int32_t qBuildQueryResult(SQInfo* pQInfo, void* pBuf); + +#endif // TDENGINE_QUERYEXECUTOR_H diff --git a/src/query/inc/queryUtil.h b/src/query/inc/queryUtil.h new file mode 100644 index 0000000000000000000000000000000000000000..06a716b750599881824d8096b9b01c0a90760771 --- /dev/null +++ b/src/query/inc/queryUtil.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#ifndef TDENGINE_QUERYUTIL_H +#define TDENGINE_QUERYUTIL_H + +void clearTimeWindowResBuf(SQueryRuntimeEnv* pRuntimeEnv, SWindowResult* pOneOutputRes); +void copyTimeWindowResBuf(SQueryRuntimeEnv* pRuntimeEnv, SWindowResult* dst, const SWindowResult* src); + +int32_t initWindowResInfo(SWindowResInfo* pWindowResInfo, SQueryRuntimeEnv* pRuntimeEnv, int32_t size, + int32_t threshold, int16_t type); + +void cleanupTimeWindowInfo(SWindowResInfo* pWindowResInfo, int32_t numOfCols); +void resetTimeWindowInfo(SQueryRuntimeEnv* pRuntimeEnv, SWindowResInfo* pWindowResInfo); +void clearFirstNTimeWindow(SQueryRuntimeEnv *pRuntimeEnv, int32_t num); + +void clearClosedTimeWindow(SQueryRuntimeEnv* pRuntimeEnv); +int32_t numOfClosedTimeWindow(SWindowResInfo* pWindowResInfo); +void closeTimeWindow(SWindowResInfo* pWindowResInfo, int32_t slot); +void closeAllTimeWindow(SWindowResInfo* pWindowResInfo); +void removeRedundantWindow(SWindowResInfo *pWindowResInfo, TSKEY lastKey, int32_t order); +SWindowResult *getWindowResult(SWindowResInfo *pWindowResInfo, int32_t slot); +int32_t curTimeWindow(SWindowResInfo *pWindowResInfo); +bool isWindowResClosed(SWindowResInfo *pWindowResInfo, int32_t slot); + +void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo); + +#endif // TDENGINE_QUERYUTIL_H diff --git a/src/query/inc/sql.y b/src/query/inc/sql.y index 2b7e0b628cb0cf951db287722036523749e0fdd8..585aa8a9cf8a5166e00ec23b9782bb979c671634 100644 --- a/src/query/inc/sql.y +++ b/src/query/inc/sql.y @@ -26,7 +26,6 @@ #include #include #include -#include "tscSQLParser.h" #include "tutil.h" } diff --git a/src/query/inc/tcache.h b/src/query/inc/tcache.h deleted file mode 100644 index b577c53ea8dbcdc9f069288b94d0244907e77f12..0000000000000000000000000000000000000000 --- a/src/query/inc/tcache.h +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#ifndef TDENGINE_TCACHE_H -#define TDENGINE_TCACHE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/** - * - * @param maxSessions maximum slots available for hash elements - * @param tmrCtrl timer ctrl - * @param refreshTime refresh operation interval time, the maximum survival time when one element is expired and - * not referenced by other objects - * @return - */ -void *taosInitDataCache(int maxSessions, void *tmrCtrl, int64_t refreshTimeInSeconds); - -/** - * add data into cache - * - * @param handle cache object - * @param key key - * @param pData cached data - * @param dataSize data size - * @param keepTime survival time in second - * @return cached element - */ -void *taosAddDataIntoCache(void *handle, char *key, char *pData, int dataSize, int keepTimeInSeconds); - -/** - * remove data in cache, the data will not be removed immediately. - * if it is referenced by other object, it will be remain in cache - * @param handle cache object - * @param data not the key, actually referenced data - * @param _remove force model, reduce the ref count and move the data into - * pTrash - */ -void taosRemoveDataFromCache(void *handle, void **data, bool _remove); - -/** - * update data in cache - * @param handle hash object handle(pointer) - * @param key key for hash - * @param pData actually data - * @param size length of data - * @param duration survival time of this object in cache - * @return new referenced data - */ -void *taosUpdateDataFromCache(void *handle, char *key, char *pData, int size, int duration); - -/** - * get data from cache - * @param handle cache object - * @param key key - * @return cached data or NULL - */ -void *taosGetDataFromCache(void *handle, char *key); - -/** - * release all allocated memory and destroy the cache object - * - * @param handle - */ -void taosCleanUpDataCache(void *handle); - -/** - * move all data node into trash,clear node in trash can if it is not referenced by client - * @param handle - */ -void taosClearDataCache(void *handle); - -/** - * Add one reference count for the exist data, and assign this data for a new owner. - * The new owner needs to invoke the taosRemoveDataFromCache when it does not need this data anymore. - * This procedure is a faster version of taosGetDataFromCache function, which avoids the sideeffect of the problem of the - * data is moved to trash, and taosGetDataFromCache will fail to retrieve it again. - * - * @param handle - * @param data - * @return - */ -void* taosGetDataFromExists(void* handle, void* data); - -/** - * transfer the ownership of data in cache to another object without increasing reference count. - * @param handle - * @param data - * @return - */ -void* taosTransferDataInCache(void* handle, void** data); - -#ifdef __cplusplus -} -#endif - -#endif // TDENGINE_TCACHE_H diff --git a/src/query/inc/tsqlfunction.h b/src/query/inc/tsqlfunction.h index 27bce71300ccf17743eaa50d64a975b624bde5d2..04ff9e56f8238ca393ed50a4e975191d158517dd 100644 --- a/src/query/inc/tsqlfunction.h +++ b/src/query/inc/tsqlfunction.h @@ -20,11 +20,11 @@ extern "C" { #endif -#include -#include +#include "os.h" -#include "trpc.h" +#include "../../common/inc/name.h" #include "taosdef.h" +#include "trpc.h" #include "tvariant.h" #define TSDB_FUNC_INVALID_ID -1 @@ -130,12 +130,8 @@ typedef struct SArithmeticSupport { typedef struct SQLPreAggVal { bool isSet; - int32_t numOfNull; - int64_t sum; - int64_t max; - int64_t min; - int16_t maxIndex; - int16_t minIndex; + int32_t size; + SDataStatis statis; } SQLPreAggVal; typedef struct SInterpInfoDetail { diff --git a/src/query/src/tscAst.c b/src/query/src/qast.c similarity index 98% rename from src/query/src/tscAst.c rename to src/query/src/qast.c index 7be583a5ea1c59bae4065185e39e22b2d0d75dd1..a0cbf121691c5e341964091038d233b40f1c3c58 100644 --- a/src/query/src/tscAst.c +++ b/src/query/src/qast.c @@ -13,20 +13,17 @@ * along with this program. If not, see . */ +#include "qast.h" #include "os.h" -#include "sskiplist.h" +#include "qsqlparser.h" +#include "qsyntaxtreefunction.h" #include "taosdef.h" #include "taosmsg.h" -#include "tast.h" #include "tlog.h" -#include "tscSyntaxtreefunction.h" -#include "tschemautil.h" #include "tsqlfunction.h" #include "tstoken.h" #include "ttokendef.h" -#include "taosdef.h" #include "tutil.h" -#include "qsqlparser.h" /* * @@ -610,6 +607,7 @@ int32_t merge(tQueryResultset *pLeft, tQueryResultset *pRight, tQueryResultset * // } // // return pFinalRes->num; + return 0; } int32_t intersect(tQueryResultset *pLeft, tQueryResultset *pRight, tQueryResultset *pFinalRes) { @@ -644,12 +642,13 @@ int32_t intersect(tQueryResultset *pLeft, tQueryResultset *pRight, tQueryResults // } // // return pFinalRes->num; + return 0; } /* * traverse the result and apply the function to each item to check if the item is qualified or not */ -static void tSQLListTraverseOnResult(struct tSQLBinaryExpr *pExpr, __result_filter_fn_t fp, tQueryResultset *pResult) { +static UNUSED_FUNC void tSQLListTraverseOnResult(struct tSQLBinaryExpr *pExpr, __result_filter_fn_t fp, tQueryResultset *pResult) { assert(pExpr->pLeft->nodeType == TSQL_NODE_COL && pExpr->pRight->nodeType == TSQL_NODE_VALUE); // brutal force scan the result list and check for each item in the list @@ -706,7 +705,7 @@ static bool filterItem(tSQLBinaryExpr *pExpr, const void *pItem, SBinaryFilterSu * @param pSchema tag schemas * @param fp filter callback function */ -static void tSQLBinaryTraverseOnResult(tSQLBinaryExpr *pExpr, tQueryResultset *pResult, SBinaryFilterSupp *param) { +static UNUSED_FUNC void tSQLBinaryTraverseOnResult(tSQLBinaryExpr *pExpr, tQueryResultset *pResult, SBinaryFilterSupp *param) { int32_t n = 0; for (int32_t i = 0; i < pResult->num; ++i) { void *pItem = pResult->pRes[i]; diff --git a/src/query/src/textbuffer.c b/src/query/src/qextbuffer.c similarity index 98% rename from src/query/src/textbuffer.c rename to src/query/src/qextbuffer.c index 056fe808588e673126081355ca31020f672ab368..d71d5669996ef306b7185d899c3eab52f893aa61 100644 --- a/src/query/src/textbuffer.c +++ b/src/query/src/qextbuffer.c @@ -12,42 +12,19 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ +#include "qextbuffer.h" #include "os.h" #include "taos.h" +#include "taosdef.h" #include "taosmsg.h" -#include "textbuffer.h" #include "tlog.h" #include "tsqlfunction.h" #include "ttime.h" -#include "taosdef.h" #include "tutil.h" #define COLMODEL_GET_VAL(data, schema, allrow, rowId, colId) \ (data + (schema)->pFields[colId].offset * (allrow) + (rowId) * (schema)->pFields[colId].field.bytes) -int32_t tmpFileSerialNum = 0; - -void getTmpfilePath(const char *fileNamePrefix, char *dstPath) { - const char* tdengineTmpFileNamePrefix = "tdengine-"; - - char tmpPath[MAX_TMPFILE_PATH_LENGTH] = {0}; - -#ifdef WINDOWS - char *tmpDir = getenv("tmp"); - if (tmpDir == NULL) { - tmpDir = ""; - } -#else - char *tmpDir = "/tmp/"; -#endif - - strcpy(tmpPath, tmpDir); - strcat(tmpPath, tdengineTmpFileNamePrefix); - strcat(tmpPath, fileNamePrefix); - strcat(tmpPath, "-%llu-%u"); - snprintf(dstPath, MAX_TMPFILE_PATH_LENGTH, tmpPath, taosGetPthreadId(), atomic_add_fetch_32(&tmpFileSerialNum, 1)); -} - /* * SColumnModel is deeply copy */ diff --git a/src/query/src/thistogram.c b/src/query/src/qhistogram.c similarity index 99% rename from src/query/src/thistogram.c rename to src/query/src/qhistogram.c index 31045a4957be59d924eb70fb0876eefb6715aaa2..26482e9f142728f0b097511f98ca2e297e34ef7b 100644 --- a/src/query/src/thistogram.c +++ b/src/query/src/qhistogram.c @@ -14,10 +14,10 @@ */ #include "os.h" +#include "qhistogram.h" +#include "taosdef.h" #include "taosmsg.h" -#include "thistogram.h" #include "tlosertree.h" -#include "taosdef.h" /** * diff --git a/src/query/src/tinterpolation.c b/src/query/src/qinterpolation.c similarity index 99% rename from src/query/src/tinterpolation.c rename to src/query/src/qinterpolation.c index 1a9da44788f5c9bc5a066bd927beaefd0f58ea34..1731e16ed82703753c59d2a8f34411b39eba8c3b 100644 --- a/src/query/src/tinterpolation.c +++ b/src/query/src/qinterpolation.c @@ -13,12 +13,12 @@ * along with this program. If not, see . */ +#include "qinterpolation.h" #include "os.h" +#include "qextbuffer.h" +#include "taosdef.h" #include "taosmsg.h" -#include "textbuffer.h" -#include "tinterpolation.h" #include "tsqlfunction.h" -#include "taosdef.h" #define INTERPOL_IS_ASC_INTERPOL(interp) ((interp)->order == TSQL_SO_ASC) diff --git a/src/query/src/tscSQLParserImpl.c b/src/query/src/qparserImpl.c similarity index 100% rename from src/query/src/tscSQLParserImpl.c rename to src/query/src/qparserImpl.c diff --git a/src/query/src/tpercentile.c b/src/query/src/qpercentile.c similarity index 99% rename from src/query/src/tpercentile.c rename to src/query/src/qpercentile.c index 6e1c28c516ce08e233b7c9afa418e571bd94e051..3b12dee0538200903ae19f2a364ea2b75543a1ae 100644 --- a/src/query/src/tpercentile.c +++ b/src/query/src/qpercentile.c @@ -15,11 +15,10 @@ #include "os.h" -#include "taosmsg.h" +#include "qpercentile.h" #include "taosdef.h" +#include "taosmsg.h" #include "tlog.h" -#include "taosdef.h" -#include "tpercentile.h" tExtMemBuffer *releaseBucketsExceptFor(tMemBucket *pMemBucket, int16_t segIdx, int16_t slotIdx) { tExtMemBuffer *pBuffer = NULL; diff --git a/src/query/src/tresultBuf.c b/src/query/src/qresultBuf.c similarity index 76% rename from src/query/src/tresultBuf.c rename to src/query/src/qresultBuf.c index a7377f16575147934f68148adb2d16126288ffc9..07fd5f11e3c2da455f8639cc69ba0f74af2fc18d 100644 --- a/src/query/src/tresultBuf.c +++ b/src/query/src/qresultBuf.c @@ -1,14 +1,14 @@ +#include "qresultBuf.h" #include "hash.h" +#include "qextbuffer.h" #include "taoserror.h" -#include "textbuffer.h" #include "tlog.h" #include "tsqlfunction.h" -#include "tresultBuf.h" #define DEFAULT_INTERN_BUF_SIZE 16384L -int32_t createDiskbasedResultBuffer(SQueryDiskbasedResultBuf** pResultBuf, int32_t size, int32_t rowSize) { - SQueryDiskbasedResultBuf* pResBuf = calloc(1, sizeof(SQueryDiskbasedResultBuf)); +int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t size, int32_t rowSize) { + SDiskbasedResultBuf* pResBuf = calloc(1, sizeof(SDiskbasedResultBuf)); pResBuf->numOfRowsPerPage = (DEFAULT_INTERN_BUF_SIZE - sizeof(tFilePage)) / rowSize; pResBuf->numOfPages = size; @@ -16,7 +16,7 @@ int32_t createDiskbasedResultBuffer(SQueryDiskbasedResultBuf** pResultBuf, int32 pResBuf->incStep = 4; // init id hash table - pResBuf->idsTable = taosInitHashTable(size, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); + pResBuf->idsTable = taosHashInit(size, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); pResBuf->list = calloc(size, sizeof(SIDList)); pResBuf->numOfAllocGroupIds = size; @@ -50,17 +50,17 @@ int32_t createDiskbasedResultBuffer(SQueryDiskbasedResultBuf** pResultBuf, int32 return TSDB_CODE_SUCCESS; } -tFilePage* getResultBufferPageById(SQueryDiskbasedResultBuf* pResultBuf, int32_t id) { +tFilePage* getResultBufferPageById(SDiskbasedResultBuf* pResultBuf, int32_t id) { assert(id < pResultBuf->numOfPages && id >= 0); return (tFilePage*)(pResultBuf->pBuf + DEFAULT_INTERN_BUF_SIZE * id); } -int32_t getNumOfResultBufGroupId(SQueryDiskbasedResultBuf* pResultBuf) { return taosNumElemsInHashTable(pResultBuf->idsTable); } +int32_t getNumOfResultBufGroupId(SDiskbasedResultBuf* pResultBuf) { return taosHashGetSize(pResultBuf->idsTable); } -int32_t getResBufSize(SQueryDiskbasedResultBuf* pResultBuf) { return pResultBuf->totalBufSize; } +int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->totalBufSize; } -static int32_t extendDiskFileSize(SQueryDiskbasedResultBuf* pResultBuf, int32_t numOfPages) { +static int32_t extendDiskFileSize(SDiskbasedResultBuf* pResultBuf, int32_t numOfPages) { assert(pResultBuf->numOfPages * DEFAULT_INTERN_BUF_SIZE == pResultBuf->totalBufSize); int32_t ret = munmap(pResultBuf->pBuf, pResultBuf->totalBufSize); @@ -88,14 +88,14 @@ static int32_t extendDiskFileSize(SQueryDiskbasedResultBuf* pResultBuf, int32_t return TSDB_CODE_SUCCESS; } -static bool noMoreAvailablePages(SQueryDiskbasedResultBuf* pResultBuf) { +static bool noMoreAvailablePages(SDiskbasedResultBuf* pResultBuf) { return (pResultBuf->allocateId == pResultBuf->numOfPages - 1); } -static int32_t getGroupIndex(SQueryDiskbasedResultBuf* pResultBuf, int32_t groupId) { +static int32_t getGroupIndex(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { assert(pResultBuf != NULL); - char* p = taosGetDataFromHashTable(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t)); + char* p = taosHashGet(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t)); if (p == NULL) { // it is a new group id return -1; } @@ -106,7 +106,7 @@ static int32_t getGroupIndex(SQueryDiskbasedResultBuf* pResultBuf, int32_t group return slot; } -static int32_t addNewGroupId(SQueryDiskbasedResultBuf* pResultBuf, int32_t groupId) { +static int32_t addNewGroupId(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { int32_t num = getNumOfResultBufGroupId(pResultBuf); // the num is the newest allocated group id slot if (pResultBuf->numOfAllocGroupIds <= num) { @@ -121,7 +121,7 @@ static int32_t addNewGroupId(SQueryDiskbasedResultBuf* pResultBuf, int32_t group pResultBuf->numOfAllocGroupIds = n; } - taosAddToHashTable(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t), &num, sizeof(int32_t)); + taosHashPut(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t), &num, sizeof(int32_t)); return num; } @@ -148,7 +148,7 @@ static int32_t doRegisterId(SIDList* pList, int32_t id) { return 0; } -static void registerPageId(SQueryDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) { +static void registerPageId(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) { int32_t slot = getGroupIndex(pResultBuf, groupId); if (slot < 0) { slot = addNewGroupId(pResultBuf, groupId); @@ -158,7 +158,7 @@ static void registerPageId(SQueryDiskbasedResultBuf* pResultBuf, int32_t groupId doRegisterId(pList, pageId); } -tFilePage* getNewDataBuf(SQueryDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId) { +tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId) { if (noMoreAvailablePages(pResultBuf)) { if (extendDiskFileSize(pResultBuf, pResultBuf->incStep) != TSDB_CODE_SUCCESS) { return NULL; @@ -177,9 +177,9 @@ tFilePage* getNewDataBuf(SQueryDiskbasedResultBuf* pResultBuf, int32_t groupId, return page; } -int32_t getNumOfRowsPerPage(SQueryDiskbasedResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; } +int32_t getNumOfRowsPerPage(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; } -SIDList getDataBufPagesIdList(SQueryDiskbasedResultBuf* pResultBuf, int32_t groupId) { +SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { SIDList list = {0}; int32_t slot = getGroupIndex(pResultBuf, groupId); if (slot < 0) { @@ -189,7 +189,7 @@ SIDList getDataBufPagesIdList(SQueryDiskbasedResultBuf* pResultBuf, int32_t grou } } -void destroyResultBuf(SQueryDiskbasedResultBuf* pResultBuf) { +void destroyResultBuf(SDiskbasedResultBuf* pResultBuf) { if (pResultBuf == NULL) { return; } @@ -210,7 +210,7 @@ void destroyResultBuf(SQueryDiskbasedResultBuf* pResultBuf) { } tfree(pResultBuf->list); - taosCleanUpHashTable(pResultBuf->idsTable); + taosHashCleanup(pResultBuf->idsTable); tfree(pResultBuf); } diff --git a/src/query/src/tscSyntaxtreefunction.c b/src/query/src/qsyntaxtreefunction.c similarity index 99% rename from src/query/src/tscSyntaxtreefunction.c rename to src/query/src/qsyntaxtreefunction.c index e3c00ed59d1da9ee6e042f748a6bf6f586f8bd2f..d21f7dab736b1b14579f3b207da89695e93a1077 100644 --- a/src/query/src/tscSyntaxtreefunction.c +++ b/src/query/src/qsyntaxtreefunction.c @@ -15,7 +15,7 @@ #include "os.h" -#include "tscSyntaxtreefunction.h" +#include "qsyntaxtreefunction.h" #include "taosdef.h" #include "tutil.h" diff --git a/src/query/src/ttokenizer.c b/src/query/src/qtokenizer.c similarity index 98% rename from src/query/src/ttokenizer.c rename to src/query/src/qtokenizer.c index f43d344ca3aeb82b45c15d6eea399f22c12392c1..61d2e59c87fa2dc136d8e51869c10fd6028a9127 100644 --- a/src/query/src/ttokenizer.c +++ b/src/query/src/qtokenizer.c @@ -14,12 +14,12 @@ */ #include "hash.h" -#include "hashutil.h" +#include "hashfunc.h" #include "os.h" #include "shash.h" +#include "taosdef.h" #include "tstoken.h" #include "ttokendef.h" -#include "taosdef.h" #include "tutil.h" // All the keywords of the SQL language are stored in a hash table @@ -253,11 +253,11 @@ static void* KeywordHashTable = NULL; static void doInitKeywordsTable() { int numOfEntries = tListLen(keywordTable); - KeywordHashTable = taosInitHashTable(numOfEntries, MurmurHash3_32, false); + KeywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, false); for (int32_t i = 0; i < numOfEntries; i++) { keywordTable[i].len = strlen(keywordTable[i].name); void* ptr = &keywordTable[i]; - taosAddToHashTable(KeywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES); + taosHashPut(KeywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES); } } @@ -275,7 +275,7 @@ int tSQLKeywordCode(const char* z, int n) { } } - SKeyword** pKey = (SKeyword**)taosGetDataFromHashTable(KeywordHashTable, key, n); + SKeyword** pKey = (SKeyword**)taosHashGet(KeywordHashTable, key, n); if (pKey != NULL) { return (*pKey)->type; } else { diff --git a/src/query/src/qtsbuf.c b/src/query/src/qtsbuf.c new file mode 100644 index 0000000000000000000000000000000000000000..ea6e6dfdc0f6db5f18b2254887df028f75233fa6 --- /dev/null +++ b/src/query/src/qtsbuf.c @@ -0,0 +1,923 @@ +#include "qtsbuf.h" +#include "tscompression.h" +#include "tutil.h" +#include "taoserror.h" + +static int32_t getDataStartOffset(); +static void TSBufUpdateVnodeInfo(STSBuf* pTSBuf, int32_t index, STSVnodeBlockInfo* pBlockInfo); +static STSBuf* allocResForTSBuf(STSBuf* pTSBuf); +static int32_t STSBufUpdateHeader(STSBuf* pTSBuf, STSBufFileHeader* pHeader); + + +/** + * todo error handling + * support auto closeable tmp file + * @param path + * @return + */ +STSBuf* tsBufCreate(bool autoDelete) { + STSBuf* pTSBuf = calloc(1, sizeof(STSBuf)); + if (pTSBuf == NULL) { + return NULL; + } + + getTmpfilePath("join", pTSBuf->path); + pTSBuf->f = fopen(pTSBuf->path, "w+"); + if (pTSBuf->f == NULL) { + free(pTSBuf); + return NULL; + } + + if (NULL == allocResForTSBuf(pTSBuf)) { + return NULL; + } + + // update the header info + STSBufFileHeader header = {.magic = TS_COMP_FILE_MAGIC, .numOfVnode = pTSBuf->numOfVnodes, .tsOrder = TSQL_SO_ASC}; + STSBufUpdateHeader(pTSBuf, &header); + + tsBufResetPos(pTSBuf); + pTSBuf->cur.order = TSQL_SO_ASC; + + pTSBuf->autoDelete = autoDelete; + pTSBuf->tsOrder = -1; + + return pTSBuf; +} + +STSBuf* tsBufCreateFromFile(const char* path, bool autoDelete) { + STSBuf* pTSBuf = calloc(1, sizeof(STSBuf)); + if (pTSBuf == NULL) { + return NULL; + } + + strncpy(pTSBuf->path, path, PATH_MAX); + + pTSBuf->f = fopen(pTSBuf->path, "r+"); + if (pTSBuf->f == NULL) { + free(pTSBuf); + return NULL; + } + + if (allocResForTSBuf(pTSBuf) == NULL) { + return NULL; + } + + // validate the file magic number + STSBufFileHeader header = {0}; + fseek(pTSBuf->f, 0, SEEK_SET); + fread(&header, 1, sizeof(header), pTSBuf->f); + + // invalid file + if (header.magic != TS_COMP_FILE_MAGIC) { + return NULL; + } + + if (header.numOfVnode > pTSBuf->numOfAlloc) { + pTSBuf->numOfAlloc = header.numOfVnode; + STSVnodeBlockInfoEx* tmp = realloc(pTSBuf->pData, sizeof(STSVnodeBlockInfoEx) * pTSBuf->numOfAlloc); + if (tmp == NULL) { + tsBufDestory(pTSBuf); + return NULL; + } + + pTSBuf->pData = tmp; + } + + pTSBuf->numOfVnodes = header.numOfVnode; + + // check the ts order + pTSBuf->tsOrder = header.tsOrder; + if (pTSBuf->tsOrder != TSQL_SO_ASC && pTSBuf->tsOrder != TSQL_SO_DESC) { +// tscError("invalid order info in buf:%d", pTSBuf->tsOrder); + tsBufDestory(pTSBuf); + return NULL; + } + + size_t infoSize = sizeof(STSVnodeBlockInfo) * pTSBuf->numOfVnodes; + + STSVnodeBlockInfo* buf = (STSVnodeBlockInfo*)calloc(1, infoSize); + + //int64_t pos = ftell(pTSBuf->f); //pos not used + fread(buf, infoSize, 1, pTSBuf->f); + + // the length value for each vnode is not kept in file, so does not set the length value + for (int32_t i = 0; i < pTSBuf->numOfVnodes; ++i) { + STSVnodeBlockInfoEx* pBlockList = &pTSBuf->pData[i]; + memcpy(&pBlockList->info, &buf[i], sizeof(STSVnodeBlockInfo)); + } + + free(buf); + + fseek(pTSBuf->f, 0, SEEK_END); + + struct stat fileStat; + fstat(fileno(pTSBuf->f), &fileStat); + + pTSBuf->fileSize = (uint32_t)fileStat.st_size; + tsBufResetPos(pTSBuf); + + // ascending by default + pTSBuf->cur.order = TSQL_SO_ASC; + + pTSBuf->autoDelete = autoDelete; + +// tscTrace("create tsBuf from file:%s, fd:%d, size:%d, numOfVnode:%d, autoDelete:%d", pTSBuf->path, fileno(pTSBuf->f), +// pTSBuf->fileSize, pTSBuf->numOfVnodes, pTSBuf->autoDelete); + + return pTSBuf; +} + +void* tsBufDestory(STSBuf* pTSBuf) { + if (pTSBuf == NULL) { + return NULL; + } + + tfree(pTSBuf->assistBuf); + tfree(pTSBuf->tsData.rawBuf); + + tfree(pTSBuf->pData); + tfree(pTSBuf->block.payload); + + fclose(pTSBuf->f); + + if (pTSBuf->autoDelete) { +// ("tsBuf %p destroyed, delete tmp file:%s", pTSBuf, pTSBuf->path); + unlink(pTSBuf->path); + } else { +// tscTrace("tsBuf %p destroyed, tmp file:%s, remains", pTSBuf, pTSBuf->path); + } + + free(pTSBuf); + return NULL; +} + +static STSVnodeBlockInfoEx* tsBufGetLastVnodeInfo(STSBuf* pTSBuf) { + int32_t last = pTSBuf->numOfVnodes - 1; + + assert(last >= 0); + return &pTSBuf->pData[last]; +} + +static STSVnodeBlockInfoEx* addOneVnodeInfo(STSBuf* pTSBuf, int32_t vnodeId) { + if (pTSBuf->numOfAlloc <= pTSBuf->numOfVnodes) { + uint32_t newSize = (uint32_t)(pTSBuf->numOfAlloc * 1.5); + assert(newSize > pTSBuf->numOfAlloc); + + STSVnodeBlockInfoEx* tmp = (STSVnodeBlockInfoEx*)realloc(pTSBuf->pData, sizeof(STSVnodeBlockInfoEx) * newSize); + if (tmp == NULL) { + return NULL; + } + + pTSBuf->pData = tmp; + pTSBuf->numOfAlloc = newSize; + memset(&pTSBuf->pData[pTSBuf->numOfVnodes], 0, sizeof(STSVnodeBlockInfoEx) * (newSize - pTSBuf->numOfVnodes)); + } + + if (pTSBuf->numOfVnodes > 0) { + STSVnodeBlockInfoEx* pPrevBlockInfoEx = tsBufGetLastVnodeInfo(pTSBuf); + + // update prev vnode length info in file + TSBufUpdateVnodeInfo(pTSBuf, pTSBuf->numOfVnodes - 1, &pPrevBlockInfoEx->info); + } + + // set initial value for vnode block + STSVnodeBlockInfo* pBlockInfo = &pTSBuf->pData[pTSBuf->numOfVnodes].info; + pBlockInfo->vnode = vnodeId; + pBlockInfo->offset = pTSBuf->fileSize; + assert(pBlockInfo->offset >= getDataStartOffset()); + + // update vnode info in file + TSBufUpdateVnodeInfo(pTSBuf, pTSBuf->numOfVnodes, pBlockInfo); + + // add one vnode info + pTSBuf->numOfVnodes += 1; + + // update the header info + STSBufFileHeader header = { + .magic = TS_COMP_FILE_MAGIC, .numOfVnode = pTSBuf->numOfVnodes, .tsOrder = pTSBuf->tsOrder}; + + STSBufUpdateHeader(pTSBuf, &header); + return tsBufGetLastVnodeInfo(pTSBuf); +} + +static void shrinkBuffer(STSList* ptsData) { + // shrink tmp buffer size if it consumes too many memory compared to the pre-defined size + if (ptsData->allocSize >= ptsData->threshold * 2) { + ptsData->rawBuf = realloc(ptsData->rawBuf, MEM_BUF_SIZE); + ptsData->allocSize = MEM_BUF_SIZE; + } +} + +static void writeDataToDisk(STSBuf* pTSBuf) { + if (pTSBuf->tsData.len == 0) { + return; + } + + STSBlock* pBlock = &pTSBuf->block; + + pBlock->numOfElem = pTSBuf->tsData.len / TSDB_KEYSIZE; + pBlock->compLen = + tsCompressTimestamp(pTSBuf->tsData.rawBuf, pTSBuf->tsData.len, pTSBuf->tsData.len / TSDB_KEYSIZE, pBlock->payload, + pTSBuf->tsData.allocSize, TWO_STAGE_COMP, pTSBuf->assistBuf, pTSBuf->bufSize); + + int64_t r = fseek(pTSBuf->f, pTSBuf->fileSize, SEEK_SET); + UNUSED(r); + + /* + * format for output data: + * 1. tags, number of ts, size after compressed, payload, size after compressed + * 2. tags, number of ts, size after compressed, payload, size after compressed + * + * both side has the compressed length is used to support load data forwards/backwords. + */ + fwrite(&pBlock->tag, sizeof(pBlock->tag), 1, pTSBuf->f); + fwrite(&pBlock->numOfElem, sizeof(pBlock->numOfElem), 1, pTSBuf->f); + + fwrite(&pBlock->compLen, sizeof(pBlock->compLen), 1, pTSBuf->f); + + fwrite(pBlock->payload, (size_t)pBlock->compLen, 1, pTSBuf->f); + + fwrite(&pBlock->compLen, sizeof(pBlock->compLen), 1, pTSBuf->f); + + int32_t blockSize = sizeof(pBlock->tag) + sizeof(pBlock->numOfElem) + sizeof(pBlock->compLen) * 2 + pBlock->compLen; + pTSBuf->fileSize += blockSize; + + pTSBuf->tsData.len = 0; + + STSVnodeBlockInfoEx* pVnodeBlockInfoEx = tsBufGetLastVnodeInfo(pTSBuf); + + pVnodeBlockInfoEx->info.compLen += blockSize; + pVnodeBlockInfoEx->info.numOfBlocks += 1; + + shrinkBuffer(&pTSBuf->tsData); +} + +static void expandBuffer(STSList* ptsData, int32_t inputSize) { + if (ptsData->allocSize - ptsData->len < inputSize) { + int32_t newSize = inputSize + ptsData->len; + char* tmp = realloc(ptsData->rawBuf, (size_t)newSize); + if (tmp == NULL) { + // todo + } + + ptsData->rawBuf = tmp; + ptsData->allocSize = newSize; + } +} + +STSBlock* readDataFromDisk(STSBuf* pTSBuf, int32_t order, bool decomp) { + STSBlock* pBlock = &pTSBuf->block; + + // clear the memory buffer + void* tmp = pBlock->payload; + memset(pBlock, 0, sizeof(STSBlock)); + pBlock->payload = tmp; + + if (order == TSQL_SO_DESC) { + /* + * set the right position for the reversed traverse, the reversed traverse is started from + * the end of each comp data block + */ + fseek(pTSBuf->f, -sizeof(pBlock->padding), SEEK_CUR); + fread(&pBlock->padding, sizeof(pBlock->padding), 1, pTSBuf->f); + + pBlock->compLen = pBlock->padding; + int32_t offset = pBlock->compLen + sizeof(pBlock->compLen) * 2 + sizeof(pBlock->numOfElem) + sizeof(pBlock->tag); + fseek(pTSBuf->f, -offset, SEEK_CUR); + } + + fread(&pBlock->tag, sizeof(pBlock->tag), 1, pTSBuf->f); + fread(&pBlock->numOfElem, sizeof(pBlock->numOfElem), 1, pTSBuf->f); + + fread(&pBlock->compLen, sizeof(pBlock->compLen), 1, pTSBuf->f); + fread(pBlock->payload, (size_t)pBlock->compLen, 1, pTSBuf->f); + + if (decomp) { + pTSBuf->tsData.len = + tsDecompressTimestamp(pBlock->payload, pBlock->compLen, pBlock->numOfElem, pTSBuf->tsData.rawBuf, + pTSBuf->tsData.allocSize, TWO_STAGE_COMP, pTSBuf->assistBuf, pTSBuf->bufSize); + } + + // read the comp length at the length of comp block + fread(&pBlock->padding, sizeof(pBlock->padding), 1, pTSBuf->f); + + // for backwards traverse, set the start position at the end of previous block + if (order == TSQL_SO_DESC) { + int32_t offset = pBlock->compLen + sizeof(pBlock->compLen) * 2 + sizeof(pBlock->numOfElem) + sizeof(pBlock->tag); + int64_t r = fseek(pTSBuf->f, -offset, SEEK_CUR); + UNUSED(r); + } + + return pBlock; +} + +// set the order of ts buffer if the ts order has not been set yet +static int32_t setCheckTSOrder(STSBuf* pTSBuf, const char* pData, int32_t len) { + STSList* ptsData = &pTSBuf->tsData; + + if (pTSBuf->tsOrder == -1) { + if (ptsData->len > 0) { + TSKEY lastKey = *(TSKEY*)(ptsData->rawBuf + ptsData->len - TSDB_KEYSIZE); + + if (lastKey > *(TSKEY*)pData) { + pTSBuf->tsOrder = TSQL_SO_DESC; + } else { + pTSBuf->tsOrder = TSQL_SO_ASC; + } + } else if (len > TSDB_KEYSIZE) { + // no data in current vnode, more than one ts is added, check the orders + TSKEY k1 = *(TSKEY*)(pData); + TSKEY k2 = *(TSKEY*)(pData + TSDB_KEYSIZE); + + if (k1 < k2) { + pTSBuf->tsOrder = TSQL_SO_ASC; + } else if (k1 > k2) { + pTSBuf->tsOrder = TSQL_SO_DESC; + } else { + // todo handle error + } + } + } else { + // todo the timestamp order is set, check the asc/desc order of appended data + } + + return TSDB_CODE_SUCCESS; +} + +void tsBufAppend(STSBuf* pTSBuf, int32_t vnodeId, int64_t tag, const char* pData, int32_t len) { + STSVnodeBlockInfoEx* pBlockInfo = NULL; + STSList* ptsData = &pTSBuf->tsData; + + if (pTSBuf->numOfVnodes == 0 || tsBufGetLastVnodeInfo(pTSBuf)->info.vnode != vnodeId) { + writeDataToDisk(pTSBuf); + shrinkBuffer(ptsData); + + pBlockInfo = addOneVnodeInfo(pTSBuf, vnodeId); + } else { + pBlockInfo = tsBufGetLastVnodeInfo(pTSBuf); + } + + assert(pBlockInfo->info.vnode == vnodeId); + + if (pTSBuf->block.tag != tag && ptsData->len > 0) { + // new arrived data with different tags value, save current value into disk first + writeDataToDisk(pTSBuf); + } else { + expandBuffer(ptsData, len); + } + + pTSBuf->block.tag = tag; + memcpy(ptsData->rawBuf + ptsData->len, pData, (size_t)len); + + // todo check return value + setCheckTSOrder(pTSBuf, pData, len); + + ptsData->len += len; + pBlockInfo->len += len; + + pTSBuf->numOfTotal += len / TSDB_KEYSIZE; + + // the size of raw data exceeds the size of the default prepared buffer, so + // during getBufBlock, the output buffer needs to be large enough. + if (ptsData->len >= ptsData->threshold) { + writeDataToDisk(pTSBuf); + shrinkBuffer(ptsData); + } + + tsBufResetPos(pTSBuf); +} + +void tsBufFlush(STSBuf* pTSBuf) { + if (pTSBuf->tsData.len <= 0) { + return; + } + + writeDataToDisk(pTSBuf); + shrinkBuffer(&pTSBuf->tsData); + + STSVnodeBlockInfoEx* pBlockInfoEx = tsBufGetLastVnodeInfo(pTSBuf); + + // update prev vnode length info in file + TSBufUpdateVnodeInfo(pTSBuf, pTSBuf->numOfVnodes - 1, &pBlockInfoEx->info); + + // save the ts order into header + STSBufFileHeader header = { + .magic = TS_COMP_FILE_MAGIC, .numOfVnode = pTSBuf->numOfVnodes, .tsOrder = pTSBuf->tsOrder}; + STSBufUpdateHeader(pTSBuf, &header); + + fsync(fileno(pTSBuf->f)); +} + +static int32_t tsBufFindVnodeIndexFromId(STSVnodeBlockInfoEx* pVnodeInfoEx, int32_t numOfVnodes, int32_t vnodeId) { + int32_t j = -1; + for (int32_t i = 0; i < numOfVnodes; ++i) { + if (pVnodeInfoEx[i].info.vnode == vnodeId) { + j = i; + break; + } + } + + return j; +} + +// todo opt performance by cache blocks info +static int32_t tsBufFindBlock(STSBuf* pTSBuf, STSVnodeBlockInfo* pBlockInfo, int32_t blockIndex) { + if (fseek(pTSBuf->f, pBlockInfo->offset, SEEK_SET) != 0) { + return -1; + } + + // sequentially read the compressed data blocks, start from the beginning of the comp data block of this vnode + int32_t i = 0; + bool decomp = false; + + while ((i++) <= blockIndex) { + if (readDataFromDisk(pTSBuf, TSQL_SO_ASC, decomp) == NULL) { + return -1; + } + } + + // set the file position to be the end of previous comp block + if (pTSBuf->cur.order == TSQL_SO_DESC) { + STSBlock* pBlock = &pTSBuf->block; + int32_t compBlockSize = + pBlock->compLen + sizeof(pBlock->compLen) * 2 + sizeof(pBlock->numOfElem) + sizeof(pBlock->tag); + fseek(pTSBuf->f, -compBlockSize, SEEK_CUR); + } + + return 0; +} + +static int32_t tsBufFindBlockByTag(STSBuf* pTSBuf, STSVnodeBlockInfo* pBlockInfo, int64_t tag) { + bool decomp = false; + + int64_t offset = 0; + if (pTSBuf->cur.order == TSQL_SO_ASC) { + offset = pBlockInfo->offset; + } else { // reversed traverse starts from the end of block + offset = pBlockInfo->offset + pBlockInfo->compLen; + } + + if (fseek(pTSBuf->f, offset, SEEK_SET) != 0) { + return -1; + } + + for (int32_t i = 0; i < pBlockInfo->numOfBlocks; ++i) { + if (readDataFromDisk(pTSBuf, pTSBuf->cur.order, decomp) == NULL) { + return -1; + } + + if (pTSBuf->block.tag == tag) { + return i; + } + } + + return -1; +} + +static void tsBufGetBlock(STSBuf* pTSBuf, int32_t vnodeIndex, int32_t blockIndex) { + STSVnodeBlockInfo* pBlockInfo = &pTSBuf->pData[vnodeIndex].info; + if (pBlockInfo->numOfBlocks <= blockIndex) { + assert(false); + } + + STSCursor* pCur = &pTSBuf->cur; + if (pCur->vnodeIndex == vnodeIndex && ((pCur->blockIndex <= blockIndex && pCur->order == TSQL_SO_ASC) || + (pCur->blockIndex >= blockIndex && pCur->order == TSQL_SO_DESC))) { + int32_t i = 0; + bool decomp = false; + int32_t step = abs(blockIndex - pCur->blockIndex); + + while ((++i) <= step) { + if (readDataFromDisk(pTSBuf, pCur->order, decomp) == NULL) { + return; + } + } + } else { + if (tsBufFindBlock(pTSBuf, pBlockInfo, blockIndex) == -1) { + assert(false); + } + } + + STSBlock* pBlock = &pTSBuf->block; + + size_t s = pBlock->numOfElem * TSDB_KEYSIZE; + + /* + * In order to accommodate all the qualified data, the actual buffer size for one block with identical tags value + * may exceed the maximum allowed size during *tsBufAppend* function by invoking expandBuffer function + */ + if (s > pTSBuf->tsData.allocSize) { + expandBuffer(&pTSBuf->tsData, s); + } + + pTSBuf->tsData.len = + tsDecompressTimestamp(pBlock->payload, pBlock->compLen, pBlock->numOfElem, pTSBuf->tsData.rawBuf, + pTSBuf->tsData.allocSize, TWO_STAGE_COMP, pTSBuf->assistBuf, pTSBuf->bufSize); + + assert((pTSBuf->tsData.len / TSDB_KEYSIZE == pBlock->numOfElem) && (pTSBuf->tsData.allocSize >= pTSBuf->tsData.len)); + + pCur->vnodeIndex = vnodeIndex; + pCur->blockIndex = blockIndex; + + pCur->tsIndex = (pCur->order == TSQL_SO_ASC) ? 0 : pBlock->numOfElem - 1; +} + +STSVnodeBlockInfo* tsBufGetVnodeBlockInfo(STSBuf* pTSBuf, int32_t vnodeId) { + int32_t j = tsBufFindVnodeIndexFromId(pTSBuf->pData, pTSBuf->numOfVnodes, vnodeId); + if (j == -1) { + return NULL; + } + + return &pTSBuf->pData[j].info; +} + +int32_t STSBufUpdateHeader(STSBuf* pTSBuf, STSBufFileHeader* pHeader) { + if ((pTSBuf->f == NULL) || pHeader == NULL || pHeader->numOfVnode < 0 || pHeader->magic != TS_COMP_FILE_MAGIC) { + return -1; + } + + int64_t r = fseek(pTSBuf->f, 0, SEEK_SET); + if (r != 0) { + return -1; + } + + fwrite(pHeader, sizeof(STSBufFileHeader), 1, pTSBuf->f); + return 0; +} + +bool tsBufNextPos(STSBuf* pTSBuf) { + if (pTSBuf == NULL || pTSBuf->numOfVnodes == 0) { + return false; + } + + STSCursor* pCur = &pTSBuf->cur; + + // get the first/last position according to traverse order + if (pCur->vnodeIndex == -1) { + if (pCur->order == TSQL_SO_ASC) { + tsBufGetBlock(pTSBuf, 0, 0); + + if (pTSBuf->block.numOfElem == 0) { // the whole list is empty, return + tsBufResetPos(pTSBuf); + return false; + } else { + return true; + } + + } else { // get the last timestamp record in the last block of the last vnode + assert(pTSBuf->numOfVnodes > 0); + + int32_t vnodeIndex = pTSBuf->numOfVnodes - 1; + pCur->vnodeIndex = vnodeIndex; + + int32_t vnodeId = pTSBuf->pData[pCur->vnodeIndex].info.vnode; + STSVnodeBlockInfo* pBlockInfo = tsBufGetVnodeBlockInfo(pTSBuf, vnodeId); + int32_t blockIndex = pBlockInfo->numOfBlocks - 1; + + tsBufGetBlock(pTSBuf, vnodeIndex, blockIndex); + + pCur->tsIndex = pTSBuf->block.numOfElem - 1; + if (pTSBuf->block.numOfElem == 0) { + tsBufResetPos(pTSBuf); + return false; + } else { + return true; + } + } + } + + int32_t step = pCur->order == TSQL_SO_ASC ? 1 : -1; + + while (1) { + assert(pTSBuf->tsData.len == pTSBuf->block.numOfElem * TSDB_KEYSIZE); + + if ((pCur->order == TSQL_SO_ASC && pCur->tsIndex >= pTSBuf->block.numOfElem - 1) || + (pCur->order == TSQL_SO_DESC && pCur->tsIndex <= 0)) { + int32_t vnodeId = pTSBuf->pData[pCur->vnodeIndex].info.vnode; + + STSVnodeBlockInfo* pBlockInfo = tsBufGetVnodeBlockInfo(pTSBuf, vnodeId); + if (pBlockInfo == NULL || (pCur->blockIndex >= pBlockInfo->numOfBlocks - 1 && pCur->order == TSQL_SO_ASC) || + (pCur->blockIndex <= 0 && pCur->order == TSQL_SO_DESC)) { + if ((pCur->vnodeIndex >= pTSBuf->numOfVnodes - 1 && pCur->order == TSQL_SO_ASC) || + (pCur->vnodeIndex <= 0 && pCur->order == TSQL_SO_DESC)) { + pCur->vnodeIndex = -1; + return false; + } + + if (pBlockInfo == NULL) { + return false; + } + + int32_t blockIndex = pCur->order == TSQL_SO_ASC ? 0 : pBlockInfo->numOfBlocks - 1; + tsBufGetBlock(pTSBuf, pCur->vnodeIndex + step, blockIndex); + break; + + } else { + tsBufGetBlock(pTSBuf, pCur->vnodeIndex, pCur->blockIndex + step); + break; + } + } else { + pCur->tsIndex += step; + break; + } + } + + return true; +} + +void tsBufResetPos(STSBuf* pTSBuf) { + if (pTSBuf == NULL) { + return; + } + + pTSBuf->cur = (STSCursor){.tsIndex = -1, .blockIndex = -1, .vnodeIndex = -1, .order = pTSBuf->cur.order}; +} + +STSElem tsBufGetElem(STSBuf* pTSBuf) { + STSElem elem1 = {.vnode = -1}; + STSCursor* pCur = &pTSBuf->cur; + + if (pTSBuf == NULL || pCur->vnodeIndex < 0) { + return elem1; + } + + STSBlock* pBlock = &pTSBuf->block; + + elem1.vnode = pTSBuf->pData[pCur->vnodeIndex].info.vnode; + elem1.ts = *(TSKEY*)(pTSBuf->tsData.rawBuf + pCur->tsIndex * TSDB_KEYSIZE); + elem1.tag = pBlock->tag; + + return elem1; +} + +/** + * current only support ts comp data from two vnode merge + * @param pDestBuf + * @param pSrcBuf + * @param vnodeId + * @return + */ +int32_t tsBufMerge(STSBuf* pDestBuf, const STSBuf* pSrcBuf, int32_t vnodeId) { + if (pDestBuf == NULL || pSrcBuf == NULL || pSrcBuf->numOfVnodes <= 0) { + return 0; + } + + if (pDestBuf->numOfVnodes + pSrcBuf->numOfVnodes > TS_COMP_FILE_VNODE_MAX) { + return -1; + } + + // src can only have one vnode index + if (pSrcBuf->numOfVnodes > 1) { + return -1; + } + + // there are data in buffer, flush to disk first + tsBufFlush(pDestBuf); + + // compared with the last vnode id + if (vnodeId != tsBufGetLastVnodeInfo(pDestBuf)->info.vnode) { + int32_t oldSize = pDestBuf->numOfVnodes; + int32_t newSize = oldSize + pSrcBuf->numOfVnodes; + + if (pDestBuf->numOfAlloc < newSize) { + pDestBuf->numOfAlloc = newSize; + + STSVnodeBlockInfoEx* tmp = realloc(pDestBuf->pData, sizeof(STSVnodeBlockInfoEx) * newSize); + if (tmp == NULL) { + return -1; + } + + pDestBuf->pData = tmp; + } + + // directly copy the vnode index information + memcpy(&pDestBuf->pData[oldSize], pSrcBuf->pData, (size_t)pSrcBuf->numOfVnodes * sizeof(STSVnodeBlockInfoEx)); + + // set the new offset value + for (int32_t i = 0; i < pSrcBuf->numOfVnodes; ++i) { + STSVnodeBlockInfoEx* pBlockInfoEx = &pDestBuf->pData[i + oldSize]; + pBlockInfoEx->info.offset = (pSrcBuf->pData[i].info.offset - getDataStartOffset()) + pDestBuf->fileSize; + pBlockInfoEx->info.vnode = vnodeId; + } + + pDestBuf->numOfVnodes = newSize; + } else { + STSVnodeBlockInfoEx* pBlockInfoEx = tsBufGetLastVnodeInfo(pDestBuf); + + pBlockInfoEx->len += pSrcBuf->pData[0].len; + pBlockInfoEx->info.numOfBlocks += pSrcBuf->pData[0].info.numOfBlocks; + pBlockInfoEx->info.compLen += pSrcBuf->pData[0].info.compLen; + pBlockInfoEx->info.vnode = vnodeId; + } + + int32_t r = fseek(pDestBuf->f, 0, SEEK_END); + assert(r == 0); + + int64_t offset = getDataStartOffset(); + int32_t size = pSrcBuf->fileSize - offset; + +#ifdef LINUX + ssize_t rc = tsendfile(fileno(pDestBuf->f), fileno(pSrcBuf->f), &offset, size); +#else + ssize_t rc = fsendfile(pDestBuf->f, pSrcBuf->f, &offset, size); +#endif + + if (rc == -1) { +// tscError("failed to merge tsBuf from:%s to %s, reason:%s\n", pSrcBuf->path, pDestBuf->path, strerror(errno)); + return -1; + } + + if (rc != size) { +// tscError("failed to merge tsBuf from:%s to %s, reason:%s\n", pSrcBuf->path, pDestBuf->path, strerror(errno)); + return -1; + } + + pDestBuf->numOfTotal += pSrcBuf->numOfTotal; + + int32_t oldSize = pDestBuf->fileSize; + + struct stat fileStat; + fstat(fileno(pDestBuf->f), &fileStat); + pDestBuf->fileSize = (uint32_t)fileStat.st_size; + + assert(pDestBuf->fileSize == oldSize + size); + +// tscTrace("tsBuf merge success, %p, path:%s, fd:%d, file size:%d, numOfVnode:%d, autoDelete:%d", pDestBuf, +// pDestBuf->path, fileno(pDestBuf->f), pDestBuf->fileSize, pDestBuf->numOfVnodes, pDestBuf->autoDelete); + + return 0; +} + +STSBuf* tsBufCreateFromCompBlocks(const char* pData, int32_t numOfBlocks, int32_t len, int32_t order) { + STSBuf* pTSBuf = tsBufCreate(true); + + STSVnodeBlockInfo* pBlockInfo = &(addOneVnodeInfo(pTSBuf, 0)->info); + pBlockInfo->numOfBlocks = numOfBlocks; + pBlockInfo->compLen = len; + pBlockInfo->offset = getDataStartOffset(); + pBlockInfo->vnode = 0; + + // update prev vnode length info in file + TSBufUpdateVnodeInfo(pTSBuf, pTSBuf->numOfVnodes - 1, pBlockInfo); + + fseek(pTSBuf->f, pBlockInfo->offset, SEEK_SET); + fwrite((void*)pData, 1, len, pTSBuf->f); + pTSBuf->fileSize += len; + + pTSBuf->tsOrder = order; + assert(order == TSQL_SO_ASC || order == TSQL_SO_DESC); + + STSBufFileHeader header = { + .magic = TS_COMP_FILE_MAGIC, .numOfVnode = pTSBuf->numOfVnodes, .tsOrder = pTSBuf->tsOrder}; + STSBufUpdateHeader(pTSBuf, &header); + + fsync(fileno(pTSBuf->f)); + + return pTSBuf; +} + +STSElem tsBufGetElemStartPos(STSBuf* pTSBuf, int32_t vnodeId, int64_t tag) { + STSElem elem = {.vnode = -1}; + + if (pTSBuf == NULL) { + return elem; + } + + int32_t j = tsBufFindVnodeIndexFromId(pTSBuf->pData, pTSBuf->numOfVnodes, vnodeId); + if (j == -1) { + return elem; + } + + // for debug purpose + // tsBufDisplay(pTSBuf); + + STSCursor* pCur = &pTSBuf->cur; + STSVnodeBlockInfo* pBlockInfo = &pTSBuf->pData[j].info; + + int32_t blockIndex = tsBufFindBlockByTag(pTSBuf, pBlockInfo, tag); + if (blockIndex < 0) { + return elem; + } + + pCur->vnodeIndex = j; + pCur->blockIndex = blockIndex; + tsBufGetBlock(pTSBuf, j, blockIndex); + + return tsBufGetElem(pTSBuf); +} + +STSCursor tsBufGetCursor(STSBuf* pTSBuf) { + STSCursor c = {.vnodeIndex = -1}; + if (pTSBuf == NULL) { + return c; + } + + return pTSBuf->cur; +} + +void tsBufSetCursor(STSBuf* pTSBuf, STSCursor* pCur) { + if (pTSBuf == NULL || pCur == NULL) { + return; + } + + // assert(pCur->vnodeIndex != -1 && pCur->tsIndex >= 0 && pCur->blockIndex >= 0); + if (pCur->vnodeIndex != -1) { + tsBufGetBlock(pTSBuf, pCur->vnodeIndex, pCur->blockIndex); + } + + pTSBuf->cur = *pCur; +} + +void tsBufSetTraverseOrder(STSBuf* pTSBuf, int32_t order) { + if (pTSBuf == NULL) { + return; + } + + pTSBuf->cur.order = order; +} + +STSBuf* tsBufClone(STSBuf* pTSBuf) { + if (pTSBuf == NULL) { + return NULL; + } + + return tsBufCreateFromFile(pTSBuf->path, false); +} + +void tsBufDisplay(STSBuf* pTSBuf) { + printf("-------start of ts comp file-------\n"); + printf("number of vnode:%d\n", pTSBuf->numOfVnodes); + + int32_t old = pTSBuf->cur.order; + pTSBuf->cur.order = TSQL_SO_ASC; + + tsBufResetPos(pTSBuf); + + while (tsBufNextPos(pTSBuf)) { + STSElem elem = tsBufGetElem(pTSBuf); + printf("%d-%" PRId64 "-%" PRId64 "\n", elem.vnode, *(int64_t*) elem.tag, elem.ts); + } + + pTSBuf->cur.order = old; + printf("-------end of ts comp file-------\n"); +} + +static int32_t getDataStartOffset() { + return sizeof(STSBufFileHeader) + TS_COMP_FILE_VNODE_MAX * sizeof(STSVnodeBlockInfo); +} + +static int32_t doUpdateVnodeInfo(STSBuf* pTSBuf, int64_t offset, STSVnodeBlockInfo* pVInfo) { + if (offset < 0 || offset >= getDataStartOffset()) { + return -1; + } + + if (fseek(pTSBuf->f, offset, SEEK_SET) != 0) { + return -1; + } + + fwrite(pVInfo, sizeof(STSVnodeBlockInfo), 1, pTSBuf->f); + return 0; +} + +// update prev vnode length info in file +static void TSBufUpdateVnodeInfo(STSBuf* pTSBuf, int32_t index, STSVnodeBlockInfo* pBlockInfo) { + int32_t offset = sizeof(STSBufFileHeader) + index * sizeof(STSVnodeBlockInfo); + doUpdateVnodeInfo(pTSBuf, offset, pBlockInfo); +} + +static STSBuf* allocResForTSBuf(STSBuf* pTSBuf) { + const int32_t INITIAL_VNODEINFO_SIZE = 4; + + pTSBuf->numOfAlloc = INITIAL_VNODEINFO_SIZE; + pTSBuf->pData = calloc(pTSBuf->numOfAlloc, sizeof(STSVnodeBlockInfoEx)); + if (pTSBuf->pData == NULL) { + tsBufDestory(pTSBuf); + return NULL; + } + + pTSBuf->tsData.rawBuf = malloc(MEM_BUF_SIZE); + if (pTSBuf->tsData.rawBuf == NULL) { + tsBufDestory(pTSBuf); + return NULL; + } + + pTSBuf->bufSize = MEM_BUF_SIZE; + pTSBuf->tsData.threshold = MEM_BUF_SIZE; + pTSBuf->tsData.allocSize = MEM_BUF_SIZE; + + pTSBuf->assistBuf = malloc(MEM_BUF_SIZE); + if (pTSBuf->assistBuf == NULL) { + tsBufDestory(pTSBuf); + return NULL; + } + + pTSBuf->block.payload = malloc(MEM_BUF_SIZE); + if (pTSBuf->block.payload == NULL) { + tsBufDestory(pTSBuf); + return NULL; + } + + pTSBuf->fileSize += getDataStartOffset(); + return pTSBuf; +} \ No newline at end of file diff --git a/src/query/src/queryExecutor.c b/src/query/src/queryExecutor.c new file mode 100644 index 0000000000000000000000000000000000000000..0a5abccdd576710f69419298bc619a806efab143 --- /dev/null +++ b/src/query/src/queryExecutor.c @@ -0,0 +1,6262 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#include "os.h" + +#include "taosmsg.h" +#include "hash.h" +#include "hashfunc.h" +#include "tlog.h" +#include "tlosertree.h" +#include "tscompression.h" +#include "tstatus.h" +#include "ttime.h" + +#include "qast.h" + +#include "qresultBuf.h" +#include "queryExecutor.h" +#include "queryUtil.h" +#include "tsdb.h" + +#define DEFAULT_INTERN_BUF_SIZE 16384L + +/** + * check if the primary column is load by default, otherwise, the program will + * forced to load primary column explicitly. + */ +#define PRIMARY_TSCOL_LOADED(query) ((query)->colList[0].data.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) + +#define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0) +#define TSDB_COL_IS_TAG(f) (((f)&TSDB_COL_TAG) != 0) +#define QUERY_IS_ASC_QUERY(q) (GET_FORWARD_DIRECTION_FACTOR((q)->order.order) == QUERY_ASC_FORWARD_STEP) + +#define IS_MASTER_SCAN(runtime) (((runtime)->scanFlag & 1u) == MASTER_SCAN) +#define IS_SUPPLEMENT_SCAN(runtime) ((runtime)->scanFlag == SUPPLEMENTARY_SCAN) +#define SET_SUPPLEMENT_SCAN_FLAG(runtime) ((runtime)->scanFlag = SUPPLEMENTARY_SCAN) +#define SET_MASTER_SCAN_FLAG(runtime) ((runtime)->scanFlag = MASTER_SCAN) + +#define GET_QINFO_ADDR(x) ((char *)(x)-offsetof(SQInfo, runtimeEnv)) + +#define GET_COL_DATA_POS(query, index, step) ((query)->pos + (index) * (step)) + +/* get the qinfo struct address from the query struct address */ +#define GET_COLUMN_BYTES(query, colidx) \ + ((query)->colList[(query)->pSelectExpr[colidx].pBase.colInfo.colIdxInBuf].info.bytes) +#define GET_COLUMN_TYPE(query, colidx) \ + ((query)->colList[(query)->pSelectExpr[colidx].pBase.colInfo.colIdxInBuf].info.type) + +typedef struct SPointInterpoSupporter { + int32_t numOfCols; + char ** pPrevPoint; + char ** pNextPoint; +} SPointInterpoSupporter; + +typedef enum { + + /* + * the program will call this function again, if this status is set. + * used to transfer from QUERY_RESBUF_FULL + */ + QUERY_NOT_COMPLETED = 0x1u, + + /* + * output buffer is full, so, the next query will be employed, + * in this case, we need to set the appropriated start scan point for + * the next query. + * + * this status is only exist in group-by clause and + * diff/add/division/multiply/ query. + */ + QUERY_RESBUF_FULL = 0x2u, + + /* + * query is over + * 1. this status is used in one row result query process, e.g., + * count/sum/first/last/ + * avg...etc. + * 2. when the query range on timestamp is satisfied, it is also denoted as + * query_compeleted + */ + QUERY_COMPLETED = 0x4u, + + /* + * all data has been scanned, so current search is stopped, + * At last, the function will transfer this status to QUERY_COMPLETED + */ + QUERY_NO_DATA_TO_CHECK = 0x8u, +} vnodeQueryStatus; + +static void setQueryStatus(SQuery *pQuery, int8_t status); +bool isIntervalQuery(SQuery *pQuery) { return pQuery->intervalTime > 0; } + +int32_t setQueryCtxForTableQuery(void *pReadMsg, SQInfo **pQInfo) {} + +enum { + TS_JOIN_TS_EQUAL = 0, + TS_JOIN_TS_NOT_EQUALS = 1, + TS_JOIN_TAG_NOT_EQUALS = 2, +}; + +static int32_t doMergeMetersResultsToGroupRes(SQInfo *pQInfo, STableDataInfo *pTableDataInfo, int32_t start, + int32_t end); + +static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult); + +static void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo); +static int32_t flushFromResultBuf(SQInfo *pQInfo); +static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId); +static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow); + +static void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void *inputData, char *primaryColumnData, int32_t size, + int32_t functionId, SDataStatis *pStatis, bool hasNull, void *param, int32_t scanFlag); +static void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv); +static void destroyMeterQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols); +static int32_t setAdditionalInfo(SQInfo *pQInfo, int32_t meterIdx, STableQueryInfo *pTableQueryInfo); +static void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv); +static bool hasMainOutput(SQuery *pQuery); + +bool getNeighborPoints(SQInfo *pQInfo, void *pMeterObj, SPointInterpoSupporter *pPointInterpSupporter) { +#if 0 + SQueryRuntimeEnv *pRuntimeEnv = &pSupporter->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + if (!isPointInterpoQuery(pQuery)) { + return false; + } + + /* + * for interpolate point query, points that are directly before/after the specified point are required + */ + if (isFirstLastRowQuery(pQuery)) { + assert(!QUERY_IS_ASC_QUERY(pQuery)); + } else { + assert(QUERY_IS_ASC_QUERY(pQuery)); + } + assert(pPointInterpSupporter != NULL && pQuery->skey == pQuery->ekey); + + SCacheBlock *pBlock = NULL; + + qTrace("QInfo:%p get next data point, fileId:%d, slot:%d, pos:%d", GET_QINFO_ADDR(pQuery), pQuery->fileId, + pQuery->slot, pQuery->pos); + + // save the point that is directly after or equals to the specified point + getOneRowFromDataBlock(pRuntimeEnv, pPointInterpSupporter->pNextPoint, pQuery->pos); + + /* + * 1. for last_row query, return immediately. + * 2. the specified timestamp equals to the required key, interpolation according to neighbor points is not necessary + * for interp query. + */ + TSKEY actualKey = *(TSKEY *)pPointInterpSupporter->pNextPoint[0]; + if (isFirstLastRowQuery(pQuery) || actualKey == pQuery->skey) { + setQueryStatus(pQuery, QUERY_NOT_COMPLETED); + + /* + * the retrieved ts may not equals to pMeterObj->lastKey due to cache re-allocation + * set the pQuery->ekey/pQuery->skey/pQuery->lastKey to be the new value. + */ + if (pQuery->ekey != actualKey) { + pQuery->skey = actualKey; + pQuery->ekey = actualKey; + pQuery->lastKey = actualKey; + pSupporter->rawSKey = actualKey; + pSupporter->rawEKey = actualKey; + } + return true; + } + + /* the qualified point is not the first point in data block */ + if (pQuery->pos > 0) { + int32_t prevPos = pQuery->pos - 1; + + /* save the point that is directly after the specified point */ + getOneRowFromDataBlock(pRuntimeEnv, pPointInterpSupporter->pPrevPoint, prevPos); + } else { + __block_search_fn_t searchFn = vnodeSearchKeyFunc[pMeterObj->searchAlgorithm]; + +// savePointPosition(&pRuntimeEnv->startPos, pQuery->fileId, pQuery->slot, pQuery->pos); + + // backwards movement would not set the pQuery->pos correct. We need to set it manually later. + moveToNextBlock(pRuntimeEnv, QUERY_DESC_FORWARD_STEP, searchFn, true); + + /* + * no previous data exists. + * reset the status and load the data block that contains the qualified point + */ + if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK)) { + dTrace("QInfo:%p no previous data block, start fileId:%d, slot:%d, pos:%d, qrange:%" PRId64 "-%" PRId64 + ", out of range", + GET_QINFO_ADDR(pQuery), pRuntimeEnv->startPos.fileId, pRuntimeEnv->startPos.slot, + pRuntimeEnv->startPos.pos, pQuery->skey, pQuery->ekey); + + // no result, return immediately + setQueryStatus(pQuery, QUERY_COMPLETED); + return false; + } else { // prev has been located + if (pQuery->fileId >= 0) { + pQuery->pos = pQuery->pBlock[pQuery->slot].numOfPoints - 1; + getOneRowFromDataBlock(pRuntimeEnv, pPointInterpSupporter->pPrevPoint, pQuery->pos); + + qTrace("QInfo:%p get prev data point, fileId:%d, slot:%d, pos:%d, pQuery->pos:%d", GET_QINFO_ADDR(pQuery), + pQuery->fileId, pQuery->slot, pQuery->pos, pQuery->pos); + } else { + // moveToNextBlock make sure there is a available cache block, if exists + assert(vnodeIsDatablockLoaded(pRuntimeEnv, pMeterObj, -1, true) == DISK_BLOCK_NO_NEED_TO_LOAD); + pBlock = &pRuntimeEnv->cacheBlock; + + pQuery->pos = pBlock->numOfPoints - 1; + getOneRowFromDataBlock(pRuntimeEnv, pPointInterpSupporter->pPrevPoint, pQuery->pos); + + qTrace("QInfo:%p get prev data point, fileId:%d, slot:%d, pos:%d, pQuery->pos:%d", GET_QINFO_ADDR(pQuery), + pQuery->fileId, pQuery->slot, pBlock->numOfPoints - 1, pQuery->pos); + } + } + } + + pQuery->skey = *(TSKEY *)pPointInterpSupporter->pPrevPoint[0]; + pQuery->ekey = *(TSKEY *)pPointInterpSupporter->pNextPoint[0]; + pQuery->lastKey = pQuery->skey; +#endif + return true; +} + +bool vnodeDoFilterData(SQuery *pQuery, int32_t elemPos) { + for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) { + SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k]; + char * pElem = pFilterInfo->pData + pFilterInfo->info.info.bytes * elemPos; + + if (isNull(pElem, pFilterInfo->info.info.type)) { + return false; + } + + int32_t num = pFilterInfo->numOfFilters; + bool qualified = false; + for (int32_t j = 0; j < num; ++j) { + SColumnFilterElem *pFilterElem = &pFilterInfo->pFilters[j]; + if (pFilterElem->fp(pFilterElem, pElem, pElem)) { + qualified = true; + break; + } + } + + if (!qualified) { + return false; + } + } + + return true; +} + +bool vnodeFilterData(SQuery *pQuery, int32_t *numOfActualRead, int32_t index) { + (*numOfActualRead)++; + if (!vnodeDoFilterData(pQuery, index)) { + return false; + } + + if (pQuery->limit.offset > 0) { + pQuery->limit.offset--; // ignore this qualified row + return false; + } + + return true; +} + +int64_t getNumOfResult(SQueryRuntimeEnv *pRuntimeEnv) { + SQuery *pQuery = pRuntimeEnv->pQuery; + bool hasMainFunction = hasMainOutput(pQuery); + + int64_t maxOutput = 0; + for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { + int32_t functionId = pQuery->pSelectExpr[j].pBase.functionId; + + /* + * ts, tag, tagprj function can not decide the output number of current query + * the number of output result is decided by main output + */ + if (hasMainFunction && + (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ)) { + continue; + } + + SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]); + if (pResInfo != NULL && maxOutput < pResInfo->numOfRes) { + maxOutput = pResInfo->numOfRes; + } + } + + return maxOutput; +} + +static int32_t getGroupResultId(int32_t groupIndex) { + int32_t base = 200000; + return base + (groupIndex * 10000); +} + +bool isGroupbyNormalCol(SSqlGroupbyExpr *pGroupbyExpr) { + if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) { + return false; + } + + for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) { + SColIndexEx *pColIndex = &pGroupbyExpr->columnInfo[i]; + if (pColIndex->flag == TSDB_COL_NORMAL) { + /* + * make sure the normal column locates at the second position if tbname exists in group by clause + */ + if (pGroupbyExpr->numOfGroupCols > 1) { + assert(pColIndex->colIdx > 0); + } + + return true; + } + } + + return false; +} + +int16_t getGroupbyColumnType(SQuery *pQuery, SSqlGroupbyExpr *pGroupbyExpr) { + assert(pGroupbyExpr != NULL); + + int32_t colId = -2; + int16_t type = TSDB_DATA_TYPE_NULL; + + for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) { + SColIndexEx *pColIndex = &pGroupbyExpr->columnInfo[i]; + if (pColIndex->flag == TSDB_COL_NORMAL) { + colId = pColIndex->colId; + break; + } + } + + for (int32_t i = 0; i < pQuery->numOfCols; ++i) { + if (colId == pQuery->colList[i].info.colId) { + type = pQuery->colList[i].info.type; + break; + } + } + + return type; +} + +bool isSelectivityWithTagsQuery(SQuery *pQuery) { + bool hasTags = false; + int32_t numOfSelectivity = 0; + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t functId = pQuery->pSelectExpr[i].pBase.functionId; + if (functId == TSDB_FUNC_TAG_DUMMY || functId == TSDB_FUNC_TS_DUMMY) { + hasTags = true; + continue; + } + + if ((aAggs[functId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) { + numOfSelectivity++; + } + } + + if (numOfSelectivity > 0 && hasTags) { + return true; + } + + return false; +} + +bool isTSCompQuery(SQuery *pQuery) { return pQuery->pSelectExpr[0].pBase.functionId == TSDB_FUNC_TS_COMP; } + +bool doRevisedResultsByLimit(SQInfo *pQInfo) { + SQuery *pQuery = pQInfo->runtimeEnv.pQuery; + + if ((pQuery->limit.limit > 0) && (pQuery->rec.pointsRead + pQInfo->rec.pointsRead > pQuery->limit.limit)) { + pQuery->rec.pointsRead = pQuery->limit.limit - pQInfo->rec.pointsRead; + + // query completed + setQueryStatus(pQuery, QUERY_COMPLETED); + return true; + } + + return false; +} + +/** + * + * @param pQuery + * @param pDataBlockInfo + * @param forwardStep + * @return TRUE means query not completed, FALSE means query is completed + */ +static bool queryPaused(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, int32_t forwardStep) { + // output buffer is full, pause current query + if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) { + // assert((QUERY_IS_ASC_QUERY(pQuery) && forwardStep + pQuery->pos <= pDataBlockInfo->size) || + // (!QUERY_IS_ASC_QUERY(pQuery) && pQuery->pos - forwardStep + 1 >= 0)); + // + return true; + } + + if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED)) { + return true; + } + + return false; +} + +static bool isTopBottomQuery(SQuery *pQuery) { + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; + if (functionId == TSDB_FUNC_TS) { + continue; + } + + if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) { + return true; + } + } + + return false; +} + +static SDataStatis *getStatisInfo(SQuery *pQuery, SDataStatis *pStatis, SDataBlockInfo *pDataBlockInfo, + int32_t columnIndex) { + // no SField info exist, or column index larger than the output column, no result. + if (pStatis == NULL) { + return NULL; + } + + // for a tag column, no corresponding field info + SColIndexEx *pColIndexEx = &pQuery->pSelectExpr[columnIndex].pBase.colInfo; + if (TSDB_COL_IS_TAG(pColIndexEx->flag)) { + return NULL; + } + + /* + * Choose the right column field info by field id, since the file block may be out of date, + * which means the newest table schema is not equalled to the schema of this block. + */ + for (int32_t i = 0; i < pDataBlockInfo->numOfCols; ++i) { + if (pColIndexEx->colId == pStatis[i].colId) { + return &pStatis[i]; + } + } + + return NULL; +} + +static bool hasNullValue(SQuery *pQuery, int32_t col, SDataBlockInfo *pDataBlockInfo, SDataStatis *pStatis, + SDataStatis **pColStatis) { + if (TSDB_COL_IS_TAG(pQuery->pSelectExpr[col].pBase.colInfo.flag) || pStatis == NULL) { + return false; + } + + *pColStatis = getStatisInfo(pQuery, pStatis, pDataBlockInfo, col); + if ((*pColStatis) != NULL && (*pColStatis)->numOfNull == 0) { + return false; + } + + return true; +} + +static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, char *pData, + int16_t bytes) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + int32_t *p1 = (int32_t *)taosHashGet(pWindowResInfo->hashList, pData, bytes); + if (p1 != NULL) { + pWindowResInfo->curIndex = *p1; + } else { // more than the capacity, reallocate the resources + if (pWindowResInfo->size >= pWindowResInfo->capacity) { + int64_t newCap = pWindowResInfo->capacity * 2; + + char *t = realloc(pWindowResInfo->pResult, newCap * sizeof(SWindowResult)); + if (t != NULL) { + pWindowResInfo->pResult = (SWindowResult *)t; + memset(&pWindowResInfo->pResult[pWindowResInfo->capacity], 0, sizeof(SWindowResult) * pWindowResInfo->capacity); + } else { + // todo + } + + for (int32_t i = pWindowResInfo->capacity; i < newCap; ++i) { + SPosInfo pos = {-1, -1}; + createQueryResultInfo(pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &pos); + } + + pWindowResInfo->capacity = newCap; + } + + // add a new result set for a new group + pWindowResInfo->curIndex = pWindowResInfo->size++; + taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t)); + } + + return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex); +} + +// get the correct time window according to the handled timestamp +static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t ts, SQuery *pQuery) { + STimeWindow w = {0}; + + if (pWindowResInfo->curIndex == -1) { // the first window, from the previous stored value + w.skey = pWindowResInfo->prevSKey; + w.ekey = w.skey + pQuery->intervalTime - 1; + } else { + int32_t slot = curTimeWindow(pWindowResInfo); + w = getWindowResult(pWindowResInfo, slot)->window; + } + + if (w.skey > ts || w.ekey < ts) { + int64_t st = w.skey; + + if (st > ts) { + st -= ((st - ts + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime; + } + + int64_t et = st + pQuery->intervalTime - 1; + if (et < ts) { + st += ((ts - et + pQuery->slidingTime - 1) / pQuery->slidingTime) * pQuery->slidingTime; + } + + w.skey = st; + w.ekey = w.skey + pQuery->intervalTime - 1; + } + + /* + * query border check, skey should not be bounded by the query time range, since the value skey will + * be used as the time window index value. So we only change ekey of time window accordingly. + */ + if (w.ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) { + w.ekey = pQuery->window.ekey; + } + + assert(ts >= w.skey && ts <= w.ekey && w.skey != 0); + + return w; +} + +static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid, + int32_t numOfRowsPerPage) { + if (pWindowRes->pos.pageId != -1) { + return 0; + } + + tFilePage *pData = NULL; + + // in the first scan, new space needed for results + int32_t pageId = -1; + SIDList list = getDataBufPagesIdList(pResultBuf, sid); + + if (list.size == 0) { + pData = getNewDataBuf(pResultBuf, sid, &pageId); + } else { + pageId = getLastPageId(&list); + pData = getResultBufferPageById(pResultBuf, pageId); + + if (pData->numOfElems >= numOfRowsPerPage) { + pData = getNewDataBuf(pResultBuf, sid, &pageId); + if (pData != NULL) { + assert(pData->numOfElems == 0); // number of elements must be 0 for new allocated buffer + } + } + } + + if (pData == NULL) { + return -1; + } + + // set the number of rows in current disk page + if (pWindowRes->pos.pageId == -1) { // not allocated yet, allocate new buffer + pWindowRes->pos.pageId = pageId; + pWindowRes->pos.rowId = pData->numOfElems++; + } + + return 0; +} + +static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid, + STimeWindow *win) { + assert(win->skey <= win->ekey); + SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; + + SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE); + if (pWindowRes == NULL) { + return -1; + } + + // not assign result buffer yet, add new result buffer + if (pWindowRes->pos.pageId == -1) { + int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, sid, pRuntimeEnv->numOfRowsPerPage); + if (ret != 0) { + return -1; + } + } + + // set time window for current result + pWindowRes->window = *win; + + setWindowResOutputBuf(pRuntimeEnv, pWindowRes); + initCtxOutputBuf(pRuntimeEnv); + + return TSDB_CODE_SUCCESS; +} + +static SWindowStatus *getTimeWindowResStatus(SWindowResInfo *pWindowResInfo, int32_t slot) { + assert(slot >= 0 && slot < pWindowResInfo->size); + return &pWindowResInfo->pResult[slot].status; +} + +static int32_t getForwardStepsInBlock(int32_t numOfPoints, __block_search_fn_t searchFn, TSKEY ekey, int16_t pos, + int16_t order, int64_t *pData) { + int32_t endPos = searchFn((char *)pData, numOfPoints, ekey, order); + int32_t forwardStep = 0; + + if (endPos >= 0) { + forwardStep = (order == TSQL_SO_ASC) ? (endPos - pos) : (pos - endPos); + assert(forwardStep >= 0); + + // endPos data is equalled to the key so, we do need to read the element in endPos + if (pData[endPos] == ekey) { + forwardStep += 1; + } + } + + return forwardStep; +} + +/** + * NOTE: the query status only set for the first scan of master scan. + */ +static void doCheckQueryCompleted(SQueryRuntimeEnv *pRuntimeEnv, TSKEY lastKey, SWindowResInfo *pWindowResInfo) { + SQuery *pQuery = pRuntimeEnv->pQuery; + if (pRuntimeEnv->scanFlag != MASTER_SCAN || (!isIntervalQuery(pQuery))) { + return; + } + + // no qualified results exist, abort check + if (pWindowResInfo->size == 0) { + return; + } + + // query completed + if ((lastKey >= pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) || + (lastKey <= pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) { + closeAllTimeWindow(pWindowResInfo); + + pWindowResInfo->curIndex = pWindowResInfo->size - 1; + setQueryStatus(pQuery, QUERY_COMPLETED | QUERY_RESBUF_FULL); + } else { // set the current index to be the last unclosed window + int32_t i = 0; + int64_t skey = 0; + + for (i = 0; i < pWindowResInfo->size; ++i) { + SWindowResult *pResult = &pWindowResInfo->pResult[i]; + if (pResult->status.closed) { + continue; + } + + if ((pResult->window.ekey <= lastKey && QUERY_IS_ASC_QUERY(pQuery)) || + (pResult->window.skey >= lastKey && !QUERY_IS_ASC_QUERY(pQuery))) { + closeTimeWindow(pWindowResInfo, i); + } else { + skey = pResult->window.skey; + break; + } + } + + // all windows are closed, set the last one to be the skey + if (skey == 0) { + assert(i == pWindowResInfo->size); + pWindowResInfo->curIndex = pWindowResInfo->size - 1; + } else { + pWindowResInfo->curIndex = i; + } + + pWindowResInfo->prevSKey = pWindowResInfo->pResult[pWindowResInfo->curIndex].window.skey; + + // the number of completed slots are larger than the threshold, dump to client immediately. + int32_t n = numOfClosedTimeWindow(pWindowResInfo); + if (n > pWindowResInfo->threshold) { + setQueryStatus(pQuery, QUERY_RESBUF_FULL); + } + + qTrace("QInfo:%p total window:%d, closed:%d", GET_QINFO_ADDR(pQuery), pWindowResInfo->size, n); + } + + assert(pWindowResInfo->prevSKey != 0); +} + +static int32_t getNumOfRowsInTimeWindow(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo, TSKEY *pPrimaryColumn, + int32_t startPos, TSKEY ekey, __block_search_fn_t searchFn, + bool updateLastKey) { + assert(startPos >= 0 && startPos < pDataBlockInfo->size); + + int32_t num = -1; + int32_t order = pQuery->order.order; + + int32_t step = GET_FORWARD_DIRECTION_FACTOR(order); + + if (QUERY_IS_ASC_QUERY(pQuery)) { + if (ekey < pDataBlockInfo->window.ekey) { + num = getForwardStepsInBlock(pDataBlockInfo->size, searchFn, ekey, startPos, order, pPrimaryColumn); + if (num == 0) { // no qualified data in current block, do not update the lastKey value + assert(ekey < pPrimaryColumn[startPos]); + } else { + if (updateLastKey) { + pQuery->lastKey = pPrimaryColumn[startPos + (num - 1)] + step; + } + } + } else { + num = pDataBlockInfo->size - startPos; + if (updateLastKey) { + pQuery->lastKey = pDataBlockInfo->window.ekey + step; + } + } + } else { // desc + if (ekey > pDataBlockInfo->window.skey) { + num = getForwardStepsInBlock(pDataBlockInfo->size, searchFn, ekey, startPos, order, pPrimaryColumn); + if (num == 0) { // no qualified data in current block, do not update the lastKey value + assert(ekey > pPrimaryColumn[startPos]); + } else { + if (updateLastKey) { + pQuery->lastKey = pPrimaryColumn[startPos - (num - 1)] + step; + } + } + } else { + num = startPos + 1; + if (updateLastKey) { + pQuery->lastKey = pDataBlockInfo->window.skey + step; + } + } + } + + assert(num >= 0); + return num; +} + +static void doBlockwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin, + int32_t startPos, int32_t forwardStep, TSKEY *tsBuf) { + SQuery * pQuery = pRuntimeEnv->pQuery; + SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; + + if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) { + for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { + int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId; + + pCtx[k].nStartQueryTimestamp = pWin->skey; + pCtx[k].size = forwardStep; + pCtx[k].startOffset = (QUERY_IS_ASC_QUERY(pQuery)) ? startPos : startPos - (forwardStep - 1); + + if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) { + pCtx[k].ptsList = &tsBuf[pCtx[k].startOffset]; + } + + if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) { + aAggs[functionId].xFunction(&pCtx[k]); + } + } + } +} + +static void doRowwiseApplyFunctions(SQueryRuntimeEnv *pRuntimeEnv, SWindowStatus *pStatus, STimeWindow *pWin, + int32_t offset) { + SQuery * pQuery = pRuntimeEnv->pQuery; + SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; + + if (IS_MASTER_SCAN(pRuntimeEnv) || pStatus->closed) { + for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { + pCtx[k].nStartQueryTimestamp = pWin->skey; + + int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId; + if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) { + aAggs[functionId].xFunctionF(&pCtx[k], offset); + } + } + } +} + +static int32_t getNextQualifiedWindow(SQueryRuntimeEnv *pRuntimeEnv, STimeWindow *pNextWin, + SWindowResInfo *pWindowResInfo, SDataBlockInfo *pDataBlockInfo, + TSKEY *primaryKeys, __block_search_fn_t searchFn) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + while (1) { + if ((pNextWin->ekey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) || + (pNextWin->skey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) { + return -1; + } + + getNextTimeWindow(pQuery, pNextWin); + + // next time window is not in current block + if ((pNextWin->skey > pDataBlockInfo->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) || + (pNextWin->ekey < pDataBlockInfo->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) { + return -1; + } + + TSKEY startKey = -1; + if (QUERY_IS_ASC_QUERY(pQuery)) { + startKey = pNextWin->skey; + if (startKey < pQuery->window.skey) { + startKey = pQuery->window.skey; + } + } else { + startKey = pNextWin->ekey; + if (startKey > pQuery->window.skey) { + startKey = pQuery->window.skey; + } + } + + int32_t startPos = searchFn((char *)primaryKeys, pDataBlockInfo->size, startKey, pQuery->order.order); + + /* + * This time window does not cover any data, try next time window, + * this case may happen when the time window is too small + */ + if ((primaryKeys[startPos] > pNextWin->ekey && QUERY_IS_ASC_QUERY(pQuery)) || + (primaryKeys[startPos] < pNextWin->skey && !QUERY_IS_ASC_QUERY(pQuery))) { + continue; + } + + return startPos; + } +} + +static TSKEY reviseWindowEkey(SQuery *pQuery, STimeWindow *pWindow) { + TSKEY ekey = -1; + if (QUERY_IS_ASC_QUERY(pQuery)) { + ekey = pWindow->ekey; + if (ekey > pQuery->window.ekey) { + ekey = pQuery->window.ekey; + } + } else { + ekey = pWindow->skey; + if (ekey < pQuery->window.ekey) { + ekey = pQuery->window.ekey; + } + } + + return ekey; +} + +char *getDataBlocks(SQueryRuntimeEnv *pRuntimeEnv, SArithmeticSupport *sas, int32_t col, int32_t size, + SArray *pDataBlock) { + SQuery * pQuery = pRuntimeEnv->pQuery; + SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; + + char *dataBlock = NULL; + + int32_t functionId = pQuery->pSelectExpr[col].pBase.functionId; + + if (functionId == TSDB_FUNC_ARITHM) { + sas->pExpr = &pQuery->pSelectExpr[col]; + + // set the start offset to be the lowest start position, no matter asc/desc query order + if (QUERY_IS_ASC_QUERY(pQuery)) { + pCtx->startOffset = pQuery->pos; + } else { + pCtx->startOffset = pQuery->pos - (size - 1); + } + + for (int32_t i = 0; i < pQuery->numOfCols; ++i) { + SColumnInfo *pColMsg = &pQuery->colList[i].info; + assert(0); + // char * pData = doGetDataBlocks(pQuery, pRuntimeEnv->colDataBuffer, pQuery->colList[i].colIdxInBuf); + + sas->elemSize[i] = pColMsg->bytes; + // sas->data[i] = pData + pCtx->startOffset * sas->elemSize[i]; // start from the offset + } + + sas->numOfCols = pQuery->numOfCols; + sas->offset = 0; + } else { // other type of query function + SColIndexEx *pCol = &pQuery->pSelectExpr[col].pBase.colInfo; + if (TSDB_COL_IS_TAG(pCol->flag)) { + dataBlock = NULL; + } else { + /* + * the colIdx is acquired from the first meter of all qualified meters in this vnode during query prepare stage, + * the remain meter may not have the required column in cache actually. + * So, the validation of required column in cache with the corresponding meter schema is reinforced. + */ + + if (pDataBlock == NULL) { + return NULL; + } + + int32_t numOfCols = taosArrayGetSize(pDataBlock); + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoEx *p = taosArrayGet(pDataBlock, i); + if (pCol->colId == p->info.colId) { + dataBlock = p->pData; + break; + } + } + } + } + + return dataBlock; +} + +/** + * + * @param pRuntimeEnv + * @param forwardStep + * @param primaryKeyCol + * @param pFields + * @param isDiskFileBlock + * @return the incremental number of output value, so it maybe 0 for fixed number of query, + * such as count/min/max etc. + */ +static int32_t blockwiseApplyAllFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, + SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo, + __block_search_fn_t searchFn, SArray *pDataBlock) { + SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; + SQuery * pQuery = pRuntimeEnv->pQuery; + + SColumnInfoEx *pColInfo = NULL; + TSKEY * primaryKeyCol = NULL; + + if (pDataBlock != NULL) { + pColInfo = taosArrayGet(pDataBlock, 0); + primaryKeyCol = (TSKEY *)(pColInfo->pData); + } + + pQuery->pos = QUERY_IS_ASC_QUERY(pQuery) ? 0 : pDataBlockInfo->size - 1; + int64_t prevNumOfRes = getNumOfResult(pRuntimeEnv); + + SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutputCols, sizeof(SArithmeticSupport)); + + for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { + int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId; + + SDataStatis *tpField = NULL; + bool hasNull = hasNullValue(pQuery, k, pDataBlockInfo, pStatis, &tpField); + char * dataBlock = getDataBlocks(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->size, pDataBlock); + + setExecParams(pQuery, &pCtx[k], dataBlock, (char *)primaryKeyCol, pDataBlockInfo->size, functionId, tpField, + hasNull, &sasArray[k], pRuntimeEnv->scanFlag); + } + + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); + if (isIntervalQuery(pQuery)) { + int32_t offset = GET_COL_DATA_POS(pQuery, 0, step); + TSKEY ts = primaryKeyCol[offset]; + + STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery); + assert(0); + // if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pRuntimeEnv->pTabObj->sid, &win) != + // TSDB_CODE_SUCCESS) { + // return 0; + // } + + TSKEY ekey = reviseWindowEkey(pQuery, &win); + int32_t forwardStep = + getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, primaryKeyCol, pQuery->pos, ekey, searchFn, true); + + SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo)); + doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &win, pQuery->pos, forwardStep, primaryKeyCol); + + int32_t index = pWindowResInfo->curIndex; + STimeWindow nextWin = win; + + while (1) { + int32_t startPos = + getNextQualifiedWindow(pRuntimeEnv, &nextWin, pWindowResInfo, pDataBlockInfo, primaryKeyCol, searchFn); + if (startPos < 0) { + break; + } + + // null data, failed to allocate more memory buffer + // int32_t sid = pRuntimeEnv->pTabObj->sid; + int32_t sid = 0; + assert(0); + if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, sid, &nextWin) != TSDB_CODE_SUCCESS) { + break; + } + + ekey = reviseWindowEkey(pQuery, &nextWin); + forwardStep = getNumOfRowsInTimeWindow(pQuery, pDataBlockInfo, primaryKeyCol, startPos, ekey, searchFn, true); + + pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo)); + + doBlockwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, startPos, forwardStep, primaryKeyCol); + } + + pWindowResInfo->curIndex = index; + } else { + /* + * the sqlfunctionCtx parameters should be set done before all functions are invoked, + * since the selectivity + tag_prj query needs all parameters been set done. + * tag_prj function are changed to be TSDB_FUNC_TAG_DUMMY + */ + for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { + int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId; + if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) { + aAggs[functionId].xFunction(&pCtx[k]); + } + } + } + + /* + * No need to calculate the number of output results for group-by normal columns, interval query + * because the results of group by normal column is put into intermediate buffer. + */ + int32_t num = 0; + if (!isIntervalQuery(pQuery)) { + num = getNumOfResult(pRuntimeEnv) - prevNumOfRes; + } + + tfree(sasArray); + return (int32_t)num; +} + +static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pData, int16_t type, int16_t bytes) { + if (isNull(pData, type)) { // ignore the null value + return -1; + } + + int32_t GROUPRESULTID = 1; + + SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; + + SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes); + if (pWindowRes == NULL) { + return -1; + } + + // not assign result buffer yet, add new result buffer + if (pWindowRes->pos.pageId == -1) { + int32_t ret = addNewWindowResultBuf(pWindowRes, pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage); + if (ret != 0) { + return -1; + } + } + + setWindowResOutputBuf(pRuntimeEnv, pWindowRes); + initCtxOutputBuf(pRuntimeEnv); + return TSDB_CODE_SUCCESS; +} + +static char *getGroupbyColumnData(SQuery *pQuery, SData **data, int16_t *type, int16_t *bytes) { + char *groupbyColumnData = NULL; + + SSqlGroupbyExpr *pGroupbyExpr = pQuery->pGroupbyExpr; + + for (int32_t k = 0; k < pGroupbyExpr->numOfGroupCols; ++k) { + if (pGroupbyExpr->columnInfo[k].flag == TSDB_COL_TAG) { + continue; + } + + int16_t colIndex = -1; + int32_t colId = pGroupbyExpr->columnInfo[k].colId; + + for (int32_t i = 0; i < pQuery->numOfCols; ++i) { + if (pQuery->colList[i].info.colId == colId) { + colIndex = i; + break; + } + } + + assert(colIndex >= 0 && colIndex < pQuery->numOfCols); + + *type = pQuery->colList[colIndex].info.type; + *bytes = pQuery->colList[colIndex].info.bytes; + + // groupbyColumnData = doGetDataBlocks(pQuery, data, pQuery->colList[colIndex].inf); + break; + } + + return groupbyColumnData; +} + +static int32_t doTSJoinFilter(SQueryRuntimeEnv *pRuntimeEnv, int32_t offset) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + STSElem elem = tsBufGetElem(pRuntimeEnv->pTSBuf); + SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; + + // compare tag first + if (pCtx[0].tag.i64Key != elem.tag) { + return TS_JOIN_TAG_NOT_EQUALS; + } + + TSKEY key = *(TSKEY *)(pCtx[0].aInputElemBuf + TSDB_KEYSIZE * offset); + +#if defined(_DEBUG_VIEW) + printf("elem in comp ts file:%" PRId64 ", key:%" PRId64 + ", tag:%d, id:%s, query order:%d, ts order:%d, traverse:%d, index:%d\n", + elem.ts, key, elem.tag, pRuntimeEnv->pTabObj->meterId, pQuery->order.order, pRuntimeEnv->pTSBuf->tsOrder, + pRuntimeEnv->pTSBuf->cur.order, pRuntimeEnv->pTSBuf->cur.tsIndex); +#endif + + if (QUERY_IS_ASC_QUERY(pQuery)) { + if (key < elem.ts) { + return TS_JOIN_TS_NOT_EQUALS; + } else if (key > elem.ts) { + assert(false); + } + } else { + if (key > elem.ts) { + return TS_JOIN_TS_NOT_EQUALS; + } else if (key < elem.ts) { + assert(false); + } + } + + return TS_JOIN_TS_EQUAL; +} + +static bool functionNeedToExecute(SQueryRuntimeEnv *pRuntimeEnv, SQLFunctionCtx *pCtx, int32_t functionId) { + SResultInfo *pResInfo = GET_RES_INFO(pCtx); + + if (pResInfo->complete || functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TS_DUMMY) { + return false; + } + + // in the supplementary scan, only the following functions need to be executed + if (IS_SUPPLEMENT_SCAN(pRuntimeEnv) && + !(functionId == TSDB_FUNC_LAST_DST || functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_FIRST || + functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TS)) { + return false; + } + + return true; +} + +static int32_t rowwiseApplyAllFunctions(SQueryRuntimeEnv *pRuntimeEnv, SDataStatis *pStatis, + SDataBlockInfo *pDataBlockInfo, SWindowResInfo *pWindowResInfo, + SArray *pDataBlock) { + SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; + SQuery * pQuery = pRuntimeEnv->pQuery; + TSKEY * primaryKeyCol = (TSKEY *)taosArrayGet(pDataBlock, 0); + + // SData **data = pRuntimeEnv->colDataBuffer; + + int64_t prevNumOfRes = 0; + bool groupbyStateValue = isGroupbyNormalCol(pQuery->pGroupbyExpr); + + if (!groupbyStateValue) { + prevNumOfRes = getNumOfResult(pRuntimeEnv); + } + + SArithmeticSupport *sasArray = calloc((size_t)pQuery->numOfOutputCols, sizeof(SArithmeticSupport)); + + int16_t type = 0; + int16_t bytes = 0; + + char *groupbyColumnData = NULL; + if (groupbyStateValue) { + assert(0); + // groupbyColumnData = getGroupbyColumnData(pQuery, data, &type, &bytes); + } + + for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { + int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId; + + SDataStatis *pColStatis = NULL; + + bool hasNull = hasNullValue(pQuery, k, pDataBlockInfo, pStatis, &pColStatis); + char *dataBlock = getDataBlocks(pRuntimeEnv, &sasArray[k], k, pDataBlockInfo->size, pDataBlock); + + setExecParams(pQuery, &pCtx[k], dataBlock, (char *)primaryKeyCol, pDataBlockInfo->size, functionId, pColStatis, + hasNull, &sasArray[k], pRuntimeEnv->scanFlag); + } + + // set the input column data + for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) { + SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k]; + assert(0); + /* + * NOTE: here the tbname/tags column cannot reach here, since it will never be a filter column, + * so we do NOT check if is a tag or not + */ + // pFilterInfo->pData = doGetDataBlocks(pQuery, data, pFilterInfo->info.colIdxInBuf); + } + + int32_t numOfRes = 0; + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); + + // from top to bottom in desc + // from bottom to top in asc order + if (pRuntimeEnv->pTSBuf != NULL) { + SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); + qTrace("QInfo:%p process data rows, numOfRows:%d, query order:%d, ts comp order:%d", pQInfo, pDataBlockInfo->size, + pQuery->order.order, pRuntimeEnv->pTSBuf->cur.order); + } + + int32_t j = 0; + TSKEY lastKey = -1; + + for (j = 0; j < pDataBlockInfo->size; ++j) { + int32_t offset = GET_COL_DATA_POS(pQuery, j, step); + + if (pRuntimeEnv->pTSBuf != NULL) { + int32_t r = doTSJoinFilter(pRuntimeEnv, offset); + if (r == TS_JOIN_TAG_NOT_EQUALS) { + break; + } else if (r == TS_JOIN_TS_NOT_EQUALS) { + continue; + } else { + assert(r == TS_JOIN_TS_EQUAL); + } + } + + if (pQuery->numOfFilterCols > 0 && (!vnodeDoFilterData(pQuery, offset))) { + continue; + } + + // interval window query + if (isIntervalQuery(pQuery)) { + // decide the time window according to the primary timestamp + int64_t ts = primaryKeyCol[offset]; + STimeWindow win = getActiveTimeWindow(pWindowResInfo, ts, pQuery); + + assert(0); + int32_t ret = 0; + // int32_t ret = setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, pRuntimeEnv->pTabObj->sid, &win); + if (ret != TSDB_CODE_SUCCESS) { // null data, too many state code + continue; + } + + // all startOffset are identical + offset -= pCtx[0].startOffset; + + SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo)); + doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &win, offset); + + lastKey = ts; + STimeWindow nextWin = win; + int32_t index = pWindowResInfo->curIndex; + assert(0); + int32_t sid = 0; // pRuntimeEnv->pTabObj->sid; + + while (1) { + getNextTimeWindow(pQuery, &nextWin); + if (pWindowResInfo->startTime > nextWin.skey || + (nextWin.skey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) || + (nextWin.skey > pQuery->window.skey && !QUERY_IS_ASC_QUERY(pQuery))) { + break; + } + + if (ts < nextWin.skey || ts > nextWin.ekey) { + break; + } + + // null data, failed to allocate more memory buffer + if (setWindowOutputBufByKey(pRuntimeEnv, pWindowResInfo, sid, &nextWin) != TSDB_CODE_SUCCESS) { + break; + } + + pStatus = getTimeWindowResStatus(pWindowResInfo, curTimeWindow(pWindowResInfo)); + doRowwiseApplyFunctions(pRuntimeEnv, pStatus, &nextWin, offset); + } + + pWindowResInfo->curIndex = index; + } else { // other queries + // decide which group this rows belongs to according to current state value + if (groupbyStateValue) { + char *stateVal = groupbyColumnData + bytes * offset; + + int32_t ret = setGroupResultOutputBuf(pRuntimeEnv, stateVal, type, bytes); + if (ret != TSDB_CODE_SUCCESS) { // null data, too many state code + continue; + } + } + + // update the lastKey + lastKey = primaryKeyCol[offset]; + + // all startOffset are identical + offset -= pCtx[0].startOffset; + + for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { + int32_t functionId = pQuery->pSelectExpr[k].pBase.functionId; + if (functionNeedToExecute(pRuntimeEnv, &pCtx[k], functionId)) { + aAggs[functionId].xFunctionF(&pCtx[k], offset); + } + } + } + + if (pRuntimeEnv->pTSBuf != NULL) { + // if timestamp filter list is empty, quit current query + if (!tsBufNextPos(pRuntimeEnv->pTSBuf)) { + setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); + break; + } + } + + /* + * pointsOffset is the maximum available space in result buffer update the actual forward step for query that + * requires checking buffer during loop + */ + if ((pQuery->checkBufferInLoop == 1) && (++numOfRes) >= pQuery->pointsOffset) { + pQuery->lastKey = lastKey + step; + assert(0); + // *forwardStep = j + 1; + break; + } + } + + free(sasArray); + + /* + * No need to calculate the number of output results for group-by normal columns, interval query + * because the results of group by normal column is put into intermediate buffer. + */ + int32_t num = 0; + if (!groupbyStateValue && !isIntervalQuery(pQuery)) { + num = getNumOfResult(pRuntimeEnv) - prevNumOfRes; + } + + return num; +} + +static int32_t reviseForwardSteps(SQueryRuntimeEnv *pRuntimeEnv, int32_t forwardStep) { + /* + * 1. If value filter exists, we try all data in current block, and do not set the QUERY_RESBUF_FULL flag. + * + * 2. In case of top/bottom/ts_comp query, the checkBufferInLoop == 1 and pQuery->numOfFilterCols + * may be 0 or not. We do not check the capacity of output buffer, since the filter function will do it. + * + * 3. In handling the query of secondary query of join, tsBuf servers as a ts filter. + */ + SQuery *pQuery = pRuntimeEnv->pQuery; + + if (isTopBottomQuery(pQuery) || isTSCompQuery(pQuery) || pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) { + return forwardStep; + } + + // current buffer does not have enough space, try in the next loop + if ((pQuery->checkBufferInLoop == 1) && (pQuery->pointsOffset <= forwardStep)) { + forwardStep = pQuery->pointsOffset; + } + + return forwardStep; +} + +static int32_t tableApplyFunctionsOnBlock(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pDataBlockInfo, + SDataStatis *pStatis, __block_search_fn_t searchFn, int32_t *numOfRes, + SWindowResInfo *pWindowResInfo, SArray *pDataBlock) { + SQuery *pQuery = pRuntimeEnv->pQuery; + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); + + if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL || isGroupbyNormalCol(pQuery->pGroupbyExpr)) { + *numOfRes = rowwiseApplyAllFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, pDataBlock); + } else { + *numOfRes = blockwiseApplyAllFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock); + } + + TSKEY lastKey = QUERY_IS_ASC_QUERY(pQuery) ? pDataBlockInfo->window.ekey : pDataBlockInfo->window.skey; + pQuery->lastKey = lastKey + step; + + doCheckQueryCompleted(pRuntimeEnv, lastKey, pWindowResInfo); + + // interval query with limit applied + if (isIntervalQuery(pQuery) && pQuery->limit.limit > 0 && + (pQuery->limit.limit + pQuery->limit.offset) <= numOfClosedTimeWindow(pWindowResInfo) && + pRuntimeEnv->scanFlag == MASTER_SCAN) { + setQueryStatus(pQuery, QUERY_COMPLETED); + } + + assert(*numOfRes >= 0); + + // check if buffer is large enough for accommodating all qualified points + if (*numOfRes > 0 && pQuery->checkBufferInLoop == 1) { + pQuery->pointsOffset -= *numOfRes; + if (pQuery->pointsOffset <= 0) { // todo return correct numOfRes for ts_comp function + pQuery->pointsOffset = 0; + setQueryStatus(pQuery, QUERY_RESBUF_FULL); + } + } + + return 0; +} + +void setExecParams(SQuery *pQuery, SQLFunctionCtx *pCtx, void *inputData, char *primaryColumnData, int32_t size, + int32_t functionId, SDataStatis *pStatis, bool hasNull, void *param, int32_t scanFlag) { + pCtx->scanFlag = scanFlag; + + pCtx->aInputElemBuf = inputData; + pCtx->hasNull = hasNull; + + if (pStatis != NULL) { + pCtx->preAggVals.isSet = true; + pCtx->preAggVals.size = size; + pCtx->preAggVals.statis = *pStatis; + } else { + pCtx->preAggVals.isSet = false; + } + + if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0 && (primaryColumnData != NULL)) { + pCtx->ptsList = (int64_t *)(primaryColumnData); + } + + if (functionId >= TSDB_FUNC_FIRST_DST && functionId <= TSDB_FUNC_LAST_DST) { + // last_dist or first_dist function + // store the first&last timestamp into the intermediate buffer [1], the true + // value may be null but timestamp will never be null + pCtx->ptsList = (int64_t *)(primaryColumnData); + } else if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_TWA || + functionId == TSDB_FUNC_DIFF || (functionId >= TSDB_FUNC_RATE && functionId <= TSDB_FUNC_AVG_IRATE)) { + /* + * leastsquares function needs two columns of input, currently, the x value of linear equation is set to + * timestamp column, and the y-value is the column specified in pQuery->pSelectExpr[i].colIdxInBuffer + * + * top/bottom function needs timestamp to indicate when the + * top/bottom values emerge, so does diff function + */ + if (functionId == TSDB_FUNC_TWA) { + STwaInfo *pTWAInfo = GET_RES_INFO(pCtx)->interResultBuf; + pTWAInfo->SKey = pQuery->window.skey; + pTWAInfo->EKey = pQuery->window.ekey; + } + + pCtx->ptsList = (int64_t *)(primaryColumnData); + + } else if (functionId == TSDB_FUNC_ARITHM) { + pCtx->param[1].pz = param; + } + + pCtx->startOffset = 0; + pCtx->size = size; + +#if defined(_DEBUG_VIEW) + // int64_t *tsList = (int64_t *)primaryColumnData; +// int64_t s = tsList[0]; +// int64_t e = tsList[size - 1]; + +// if (IS_DATA_BLOCK_LOADED(blockStatus)) { +// dTrace("QInfo:%p query ts:%lld-%lld, offset:%d, rows:%d, bstatus:%d, +// functId:%d", GET_QINFO_ADDR(pQuery), +// s, e, startOffset, size, blockStatus, functionId); +// } else { +// dTrace("QInfo:%p block not loaded, bstatus:%d", +// GET_QINFO_ADDR(pQuery), blockStatus); +// } +#endif +} + +// set the output buffer for the selectivity + tag query +static void setCtxTagColumnInfo(SQuery *pQuery, SQLFunctionCtx *pCtx) { + if (isSelectivityWithTagsQuery(pQuery)) { + int32_t num = 0; + SQLFunctionCtx *p = NULL; + + int16_t tagLen = 0; + + SQLFunctionCtx **pTagCtx = calloc(pQuery->numOfOutputCols, POINTER_BYTES); + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + SSqlFuncExprMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].pBase; + if (pSqlFuncMsg->functionId == TSDB_FUNC_TAG_DUMMY || pSqlFuncMsg->functionId == TSDB_FUNC_TS_DUMMY) { + tagLen += pCtx[i].outputBytes; + pTagCtx[num++] = &pCtx[i]; + } else if ((aAggs[pSqlFuncMsg->functionId].nStatus & TSDB_FUNCSTATE_SELECTIVITY) != 0) { + p = &pCtx[i]; + } else if (pSqlFuncMsg->functionId == TSDB_FUNC_TS || pSqlFuncMsg->functionId == TSDB_FUNC_TAG) { + // tag function may be the group by tag column + // ts may be the required primary timestamp column + continue; + } else { + // the column may be the normal column, group by normal_column, the functionId is TSDB_FUNC_PRJ + } + } + + p->tagInfo.pTagCtxList = pTagCtx; + p->tagInfo.numOfTagCols = num; + p->tagInfo.tagsLen = tagLen; + } +} + +static void setWindowResultInfo(SResultInfo *pResultInfo, SQuery *pQuery, bool isStableQuery) { + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + setResultInfoBuf(&pResultInfo[i], pQuery->pSelectExpr[i].interResBytes, isStableQuery); + } +} + +static int32_t setupQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv, SColumnModel *pTagsSchema, int16_t order, bool isSTableQuery) { + dTrace("QInfo:%p setup runtime env", GET_QINFO_ADDR(pRuntimeEnv)); + SQuery* pQuery = pRuntimeEnv->pQuery; + + pRuntimeEnv->resultInfo = calloc(pQuery->numOfOutputCols, sizeof(SResultInfo)); + pRuntimeEnv->pCtx = (SQLFunctionCtx *)calloc(pQuery->numOfOutputCols, sizeof(SQLFunctionCtx)); + + if (pRuntimeEnv->resultInfo == NULL || pRuntimeEnv->pCtx == NULL) { + goto _error_clean; + } + + pRuntimeEnv->offset[0] = 0; + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + SSqlFuncExprMsg *pSqlFuncMsg = &pQuery->pSelectExpr[i].pBase; + SColIndexEx * pColIndexEx = &pSqlFuncMsg->colInfo; + + SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; + + if (TSDB_COL_IS_TAG(pSqlFuncMsg->colInfo.flag)) { // process tag column info + SSchema *pSchema = getColumnModelSchema(pTagsSchema, pColIndexEx->colIdx); + + pCtx->inputType = pSchema->type; + pCtx->inputBytes = pSchema->bytes; + } else { + pCtx->inputType = GET_COLUMN_TYPE(pQuery, i); + pCtx->inputBytes = GET_COLUMN_BYTES(pQuery, i); + } + + pCtx->ptsOutputBuf = NULL; + + pCtx->outputBytes = pQuery->pSelectExpr[i].resBytes; + pCtx->outputType = pQuery->pSelectExpr[i].resType; + + pCtx->order = pQuery->order.order; + pCtx->functionId = pSqlFuncMsg->functionId; + + pCtx->numOfParams = pSqlFuncMsg->numOfParams; + for (int32_t j = 0; j < pCtx->numOfParams; ++j) { + int16_t type = pSqlFuncMsg->arg[j].argType; + int16_t bytes = pSqlFuncMsg->arg[j].argBytes; + if (type == TSDB_DATA_TYPE_BINARY || type == TSDB_DATA_TYPE_NCHAR) { + tVariantCreateFromBinary(&pCtx->param[j], pSqlFuncMsg->arg->argValue.pz, bytes, type); + } else { + tVariantCreateFromBinary(&pCtx->param[j], (char *)&pSqlFuncMsg->arg[j].argValue.i64, bytes, type); + } + } + + // set the order information for top/bottom query + int32_t functionId = pCtx->functionId; + + if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) { + int32_t f = pQuery->pSelectExpr[0].pBase.functionId; + assert(f == TSDB_FUNC_TS || f == TSDB_FUNC_TS_DUMMY); + + pCtx->param[2].i64Key = order; + pCtx->param[2].nType = TSDB_DATA_TYPE_BIGINT; + pCtx->param[3].i64Key = functionId; + pCtx->param[3].nType = TSDB_DATA_TYPE_BIGINT; + + pCtx->param[1].i64Key = pQuery->order.orderColId; + } + + if (i > 0) { + pRuntimeEnv->offset[i] = pRuntimeEnv->offset[i - 1] + pRuntimeEnv->pCtx[i - 1].outputBytes; + } + } + + // set the intermediate result output buffer + setWindowResultInfo(pRuntimeEnv->resultInfo, pQuery, isSTableQuery); + + // if it is group by normal column, do not set output buffer, the output buffer is pResult + if (!isGroupbyNormalCol(pQuery->pGroupbyExpr) && !isSTableQuery) { + resetCtxOutputBuf(pRuntimeEnv); + } + + setCtxTagColumnInfo(pQuery, pRuntimeEnv->pCtx); + return TSDB_CODE_SUCCESS; + +_error_clean: + tfree(pRuntimeEnv->resultInfo); + tfree(pRuntimeEnv->pCtx); + + return TSDB_CODE_SERV_OUT_OF_MEMORY; +} + +static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) { + if (pRuntimeEnv->pQuery == NULL) { + return; + } + + SQuery *pQuery = pRuntimeEnv->pQuery; + + dTrace("QInfo:%p teardown runtime env", GET_QINFO_ADDR(pQuery)); + cleanupTimeWindowInfo(&pRuntimeEnv->windowResInfo, pQuery->numOfOutputCols); + + if (pRuntimeEnv->pCtx != NULL) { + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; + + for (int32_t j = 0; j < pCtx->numOfParams; ++j) { + tVariantDestroy(&pCtx->param[j]); + } + + tVariantDestroy(&pCtx->tag); + tfree(pCtx->tagInfo.pTagCtxList); + tfree(pRuntimeEnv->resultInfo[i].interResultBuf); + } + + tfree(pRuntimeEnv->resultInfo); + tfree(pRuntimeEnv->pCtx); + } + + taosDestoryInterpoInfo(&pRuntimeEnv->interpoInfo); + + if (pRuntimeEnv->pInterpoBuf != NULL) { + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + tfree(pRuntimeEnv->pInterpoBuf[i]); + } + + tfree(pRuntimeEnv->pInterpoBuf); + } + + destroyResultBuf(pRuntimeEnv->pResultBuf); + pRuntimeEnv->pTSBuf = tsBufDestory(pRuntimeEnv->pTSBuf); +} + +bool isQueryKilled(SQuery *pQuery) { + return false; + + SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); +#if 0 + /* + * check if the queried meter is going to be deleted. + * if it will be deleted soon, stop current query ASAP. + */ + SMeterObj *pMeterObj = pQInfo->pObj; + if (vnodeIsMeterState(pMeterObj, TSDB_METER_STATE_DROPPING)) { + pQInfo->killed = 1; + return true; + } + + return (pQInfo->killed == 1); +#endif + return 0; +} + +bool isFixedOutputQuery(SQuery *pQuery) { + if (pQuery->intervalTime != 0) { + return false; + } + + // Note:top/bottom query is fixed output query + if (isTopBottomQuery(pQuery) || isGroupbyNormalCol(pQuery->pGroupbyExpr)) { + return true; + } + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + SSqlFuncExprMsg *pExprMsg = &pQuery->pSelectExpr[i].pBase; + + // ignore the ts_comp function + if (i == 0 && pExprMsg->functionId == TSDB_FUNC_PRJ && pExprMsg->numOfParams == 1 && + pExprMsg->colInfo.colIdx == PRIMARYKEY_TIMESTAMP_COL_INDEX) { + continue; + } + + if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) { + continue; + } + + if (!IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus)) { + return true; + } + } + + return false; +} + +bool isPointInterpoQuery(SQuery *pQuery) { + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t functionID = pQuery->pSelectExpr[i].pBase.functionId; + if (functionID == TSDB_FUNC_INTERP || functionID == TSDB_FUNC_LAST_ROW) { + return true; + } + } + + return false; +} + +// TODO REFACTOR:MERGE WITH CLIENT-SIDE FUNCTION +bool isSumAvgRateQuery(SQuery *pQuery) { + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; + if (functionId == TSDB_FUNC_TS) { + continue; + } + + if (functionId == TSDB_FUNC_SUM_RATE || functionId == TSDB_FUNC_SUM_IRATE || functionId == TSDB_FUNC_AVG_RATE || + functionId == TSDB_FUNC_AVG_IRATE) { + return true; + } + } + + return false; +} + +bool isFirstLastRowQuery(SQuery *pQuery) { + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t functionID = pQuery->pSelectExpr[i].pBase.functionId; + if (functionID == TSDB_FUNC_LAST_ROW) { + return true; + } + } + + return false; +} + +bool notHasQueryTimeRange(SQuery *pQuery) { + return (pQuery->window.skey == 0 && pQuery->window.ekey == INT64_MAX && QUERY_IS_ASC_QUERY(pQuery)) || + (pQuery->window.skey == INT64_MAX && pQuery->window.ekey == 0 && (!QUERY_IS_ASC_QUERY(pQuery))); +} + +bool needSupplementaryScan(SQuery *pQuery) { + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; + if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG) { + continue; + } + + if (((functionId == TSDB_FUNC_LAST || functionId == TSDB_FUNC_LAST_DST) && QUERY_IS_ASC_QUERY(pQuery)) || + ((functionId == TSDB_FUNC_FIRST || functionId == TSDB_FUNC_FIRST_DST) && !QUERY_IS_ASC_QUERY(pQuery))) { + return true; + } + } + + return false; +} +///////////////////////////////////////////////////////////////////////////////////////////// + +void doGetAlignedIntervalQueryRangeImpl(SQuery *pQuery, int64_t key, int64_t keyFirst, int64_t keyLast, + int64_t *realSkey, int64_t *realEkey, STimeWindow *win) { + assert(key >= keyFirst && key <= keyLast && pQuery->slidingTime <= pQuery->intervalTime); + + win->skey = taosGetIntervalStartTimestamp(key, pQuery->slidingTime, pQuery->slidingTimeUnit, pQuery->precision); + + if (keyFirst > (INT64_MAX - pQuery->intervalTime)) { + /* + * if the realSkey > INT64_MAX - pQuery->intervalTime, the query duration between + * realSkey and realEkey must be less than one interval.Therefore, no need to adjust the query ranges. + */ + assert(keyLast - keyFirst < pQuery->intervalTime); + + *realSkey = keyFirst; + *realEkey = keyLast; + + win->ekey = INT64_MAX; + return; + } + + win->ekey = win->skey + pQuery->intervalTime - 1; + + if (win->skey < keyFirst) { + *realSkey = keyFirst; + } else { + *realSkey = win->skey; + } + + if (win->ekey < keyLast) { + *realEkey = win->ekey; + } else { + *realEkey = keyLast; + } +} + +static bool doGetQueryPos(TSKEY key, SQInfo *pQInfo, SPointInterpoSupporter *pPointInterpSupporter) { +#if 0 + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + SMeterObj * pMeterObj = pRuntimeEnv->pTabObj; + + /* key in query range. If not, no qualified in disk file */ + if (key != -1 && key <= pQuery->window.ekey) { + if (isPointInterpoQuery(pQuery)) { /* no qualified data in this query range */ + return getNeighborPoints(pQInfo, pMeterObj, pPointInterpSupporter); + } else { + return true; + } + } else { // key > pQuery->window.ekey, abort for normal query, continue for interp query + if (isPointInterpoQuery(pQuery)) { + return getNeighborPoints(pQInfo, pMeterObj, pPointInterpSupporter); + } else { + return false; + } + } +#endif +} + +static bool doSetDataInfo(SQInfo *pQInfo, SPointInterpoSupporter *pPointInterpSupporter, void *pMeterObj, + TSKEY nextKey) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + if (isFirstLastRowQuery(pQuery)) { + /* + * if the pQuery->window.skey != pQuery->window.ekey for last_row query, + * the query range is existed, so set them both the value of nextKey + */ + if (pQuery->window.skey != pQuery->window.ekey) { + assert(pQuery->window.skey >= pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery) && + nextKey >= pQuery->window.ekey && nextKey <= pQuery->window.skey); + + pQuery->window.skey = nextKey; + pQuery->window.ekey = nextKey; + } + + return getNeighborPoints(pQInfo, pMeterObj, pPointInterpSupporter); + } else { + return true; + } +} + +// TODO refactor code, the best way to implement the last_row is utilizing the iterator +bool normalizeUnBoundLastRowQuery(SQInfo *pQInfo, SPointInterpoSupporter *pPointInterpSupporter) { +#if 0 + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + + SQuery * pQuery = pRuntimeEnv->pQuery; + SMeterObj *pMeterObj = pRuntimeEnv->pTabObj; + + assert(!QUERY_IS_ASC_QUERY(pQuery) && notHasQueryTimeRange(pQuery)); + __block_search_fn_t searchFn = vnodeSearchKeyFunc[pMeterObj->searchAlgorithm]; + + TSKEY lastKey = -1; + + pQuery->fileId = -1; + vnodeFreeFieldsEx(pRuntimeEnv); + + // keep in-memory cache status in local variables in case that it may be changed by write operation + getBasicCacheInfoSnapshot(pQuery, pMeterObj->pCache, pMeterObj->vnode); + + SCacheInfo *pCacheInfo = (SCacheInfo *)pMeterObj->pCache; + if (pCacheInfo != NULL && pCacheInfo->cacheBlocks != NULL && pQuery->numOfBlocks > 0) { + pQuery->fileId = -1; + TSKEY key = pMeterObj->lastKey; + + pQuery->window.skey = key; + pQuery->window.ekey = key; + pQuery->lastKey = pQuery->window.skey; + + /* + * cache block may have been flushed to disk, and no data in cache anymore. + * So, copy cache block to local buffer is required. + */ + lastKey = getQueryStartPositionInCache(pRuntimeEnv, &pQuery->slot, &pQuery->pos, false); + if (lastKey < 0) { // data has been flushed to disk, try again search in file + lastKey = getQueryPositionForCacheInvalid(pRuntimeEnv, searchFn); + + if (Q_STATUS_EQUAL(pQuery->status, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) { + return false; + } + } + } else { // no data in cache, try file + TSKEY key = pMeterObj->lastKeyOnFile; + + pQuery->window.skey = key; + pQuery->window.ekey = key; + pQuery->lastKey = pQuery->window.skey; + + bool ret = getQualifiedDataBlock(pMeterObj, pRuntimeEnv, QUERY_RANGE_LESS_EQUAL, searchFn); + if (!ret) { // no data in file, return false; + return false; + } + + lastKey = getTimestampInDiskBlock(pRuntimeEnv, pQuery->pos); + } + + assert(lastKey <= pQuery->window.skey); + + pQuery->window.skey = lastKey; + pQuery->window.ekey = lastKey; + pQuery->lastKey = pQuery->window.skey; + + return getNeighborPoints(pQInfo, pMeterObj, pPointInterpSupporter); +#endif + + return true; +} + +static void setScanLimitationByResultBuffer(SQuery *pQuery) { + if (isTopBottomQuery(pQuery)) { + pQuery->checkBufferInLoop = 0; + } else if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { + pQuery->checkBufferInLoop = 0; + } else { + bool hasMultioutput = false; + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + SSqlFuncExprMsg *pExprMsg = &pQuery->pSelectExpr[i].pBase; + if (pExprMsg->functionId == TSDB_FUNC_TS || pExprMsg->functionId == TSDB_FUNC_TS_DUMMY) { + continue; + } + + hasMultioutput = IS_MULTIOUTPUT(aAggs[pExprMsg->functionId].nStatus); + if (!hasMultioutput) { + break; + } + } + + pQuery->checkBufferInLoop = hasMultioutput ? 1 : 0; + } + + // pQuery->pointsOffset = pQuery->pointsToRead; +} + +/* + * todo add more parameters to check soon.. + */ +bool vnodeParametersSafetyCheck(SQuery *pQuery) { + // load data column information is incorrect + for (int32_t i = 0; i < pQuery->numOfCols - 1; ++i) { + if (pQuery->colList[i].info.colId == pQuery->colList[i + 1].info.colId) { + dError("QInfo:%p invalid data load column for query", GET_QINFO_ADDR(pQuery)); + return false; + } + } + return true; +} + +// todo ignore the avg/sum/min/max/count/stddev/top/bottom functions, of which +// the scan order is not matter +static bool onlyOneQueryType(SQuery *pQuery, int32_t functId, int32_t functIdDst) { + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; + + if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TS_DUMMY || functionId == TSDB_FUNC_TAG || + functionId == TSDB_FUNC_TAG_DUMMY) { + continue; + } + + if (functionId != functId && functionId != functIdDst) { + return false; + } + } + + return true; +} + +static bool onlyFirstQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_FIRST, TSDB_FUNC_FIRST_DST); } + +static bool onlyLastQuery(SQuery *pQuery) { return onlyOneQueryType(pQuery, TSDB_FUNC_LAST, TSDB_FUNC_LAST_DST); } + +static void changeExecuteScanOrder(SQuery *pQuery, bool metricQuery) { + // in case of point-interpolation query, use asc order scan + char msg[] = "QInfo:%p scan order changed for %s query, old:%d, new:%d, qrange exchanged, old qrange:%" PRId64 + "-%" PRId64 ", new qrange:%" PRId64 "-%" PRId64; + + // todo handle the case the the order irrelevant query type mixed up with order critical query type + // descending order query for last_row query + if (isFirstLastRowQuery(pQuery)) { + dTrace("QInfo:%p scan order changed for last_row query, old:%d, new:%d", GET_QINFO_ADDR(pQuery), + pQuery->order.order, TSQL_SO_DESC); + + pQuery->order.order = TSQL_SO_DESC; + + int64_t skey = MIN(pQuery->window.skey, pQuery->window.ekey); + int64_t ekey = MAX(pQuery->window.skey, pQuery->window.ekey); + + pQuery->window.skey = ekey; + pQuery->window.ekey = skey; + + return; + } + + if (isPointInterpoQuery(pQuery) && pQuery->intervalTime == 0) { + if (!QUERY_IS_ASC_QUERY(pQuery)) { + dTrace(msg, GET_QINFO_ADDR(pQuery), "interp", pQuery->order.order, TSQL_SO_ASC, pQuery->window.skey, + pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey); + SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY); + } + + pQuery->order.order = TSQL_SO_ASC; + return; + } + + if (pQuery->intervalTime == 0) { + if (onlyFirstQuery(pQuery)) { + if (!QUERY_IS_ASC_QUERY(pQuery)) { + dTrace(msg, GET_QINFO_ADDR(pQuery), "only-first", pQuery->order.order, TSQL_SO_ASC, pQuery->window.skey, + pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey); + + SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY); + } + + pQuery->order.order = TSQL_SO_ASC; + } else if (onlyLastQuery(pQuery)) { + if (QUERY_IS_ASC_QUERY(pQuery)) { + dTrace(msg, GET_QINFO_ADDR(pQuery), "only-last", pQuery->order.order, TSQL_SO_DESC, pQuery->window.skey, + pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey); + + SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY); + } + + pQuery->order.order = TSQL_SO_DESC; + } + + } else { // interval query + if (metricQuery) { + if (onlyFirstQuery(pQuery)) { + if (!QUERY_IS_ASC_QUERY(pQuery)) { + dTrace(msg, GET_QINFO_ADDR(pQuery), "only-first stable", pQuery->order.order, TSQL_SO_ASC, + pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey); + + SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY); + } + + pQuery->order.order = TSQL_SO_ASC; + } else if (onlyLastQuery(pQuery)) { + if (QUERY_IS_ASC_QUERY(pQuery)) { + dTrace(msg, GET_QINFO_ADDR(pQuery), "only-last stable", pQuery->order.order, TSQL_SO_DESC, + pQuery->window.skey, pQuery->window.ekey, pQuery->window.ekey, pQuery->window.skey); + + SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY); + } + + pQuery->order.order = TSQL_SO_DESC; + } + } + } +} + +static void doSetInterpVal(SQLFunctionCtx *pCtx, TSKEY ts, int16_t type, int32_t index, char *data) { + assert(pCtx->param[index].pz == NULL); + + int32_t len = 0; + size_t t = 0; + + if (type == TSDB_DATA_TYPE_BINARY) { + t = strlen(data); + + len = t + 1 + TSDB_KEYSIZE; + pCtx->param[index].pz = calloc(1, len); + } else if (type == TSDB_DATA_TYPE_NCHAR) { + t = wcslen((const wchar_t *)data); + + len = (t + 1) * TSDB_NCHAR_SIZE + TSDB_KEYSIZE; + pCtx->param[index].pz = calloc(1, len); + } else { + len = TSDB_KEYSIZE * 2; + pCtx->param[index].pz = malloc(len); + } + + pCtx->param[index].nType = TSDB_DATA_TYPE_BINARY; + + char *z = pCtx->param[index].pz; + *(TSKEY *)z = ts; + z += TSDB_KEYSIZE; + + switch (type) { + case TSDB_DATA_TYPE_FLOAT: + *(double *)z = GET_FLOAT_VAL(data); + break; + case TSDB_DATA_TYPE_DOUBLE: + *(double *)z = GET_DOUBLE_VAL(data); + break; + case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_BOOL: + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_SMALLINT: + case TSDB_DATA_TYPE_TIMESTAMP: + *(int64_t *)z = GET_INT64_VAL(data); + break; + case TSDB_DATA_TYPE_BINARY: + strncpy(z, data, t); + break; + case TSDB_DATA_TYPE_NCHAR: { + wcsncpy((wchar_t *)z, (const wchar_t *)data, t); + } break; + default: + assert(0); + } + + pCtx->param[index].nLen = len; +} + +/** + * param[1]: default value/previous value of specified timestamp + * param[2]: next value of specified timestamp + * param[3]: denotes if the result is a precious result or interpolation results + * + * @param pQInfo + * @param pQInfo + * @param pInterpoRaw + */ +void pointInterpSupporterSetData(SQInfo *pQInfo, SPointInterpoSupporter *pPointInterpSupport) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + // not point interpolation query, abort + if (!isPointInterpoQuery(pQuery)) { + return; + } + + int32_t count = 1; + TSKEY key = *(TSKEY *)pPointInterpSupport->pNextPoint[0]; + + if (key == pQuery->window.skey) { + // the queried timestamp has value, return it directly without interpolation + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + tVariantCreateFromBinary(&pRuntimeEnv->pCtx[i].param[3], (char *)&count, sizeof(count), TSDB_DATA_TYPE_INT); + + pRuntimeEnv->pCtx[i].param[0].i64Key = key; + pRuntimeEnv->pCtx[i].param[0].nType = TSDB_DATA_TYPE_BIGINT; + } + } else { + // set the direct previous(next) point for process + count = 2; + + if (pQuery->interpoType == TSDB_INTERPO_SET_VALUE) { + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; + + // only the function of interp needs the corresponding information + if (pCtx->functionId != TSDB_FUNC_INTERP) { + continue; + } + + pCtx->numOfParams = 4; + + SInterpInfo *pInterpInfo = (SInterpInfo *)pRuntimeEnv->pCtx[i].aOutputBuf; + pInterpInfo->pInterpDetail = calloc(1, sizeof(SInterpInfoDetail)); + + SInterpInfoDetail *pInterpDetail = pInterpInfo->pInterpDetail; + + // for primary timestamp column, set the flag + if (pQuery->pSelectExpr[i].pBase.colInfo.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) { + pInterpDetail->primaryCol = 1; + } + + tVariantCreateFromBinary(&pCtx->param[3], (char *)&count, sizeof(count), TSDB_DATA_TYPE_INT); + + if (isNull((char *)&pQuery->defaultVal[i], pCtx->inputType)) { + pCtx->param[1].nType = TSDB_DATA_TYPE_NULL; + } else { + tVariantCreateFromBinary(&pCtx->param[1], (char *)&pQuery->defaultVal[i], pCtx->inputBytes, pCtx->inputType); + } + + pInterpDetail->ts = pQuery->window.skey; + pInterpDetail->type = pQuery->interpoType; + } + } else { + TSKEY prevKey = *(TSKEY *)pPointInterpSupport->pPrevPoint[0]; + TSKEY nextKey = *(TSKEY *)pPointInterpSupport->pNextPoint[0]; + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; + + // tag column does not need the interp environment + if (pQuery->pSelectExpr[i].pBase.functionId == TSDB_FUNC_TAG) { + continue; + } + + int32_t colInBuf = pQuery->pSelectExpr[i].pBase.colInfo.colIdxInBuf; + + SInterpInfo *pInterpInfo = (SInterpInfo *)pRuntimeEnv->pCtx[i].aOutputBuf; + + pInterpInfo->pInterpDetail = calloc(1, sizeof(SInterpInfoDetail)); + SInterpInfoDetail *pInterpDetail = pInterpInfo->pInterpDetail; + + // int32_t type = GET_COLUMN_TYPE(pQuery, i); + int32_t type = 0; + assert(0); + + // for primary timestamp column, set the flag + if (pQuery->pSelectExpr[i].pBase.colInfo.colId == PRIMARYKEY_TIMESTAMP_COL_INDEX) { + pInterpDetail->primaryCol = 1; + } else { + doSetInterpVal(pCtx, prevKey, type, 1, pPointInterpSupport->pPrevPoint[colInBuf]); + doSetInterpVal(pCtx, nextKey, type, 2, pPointInterpSupport->pNextPoint[colInBuf]); + } + + tVariantCreateFromBinary(&pRuntimeEnv->pCtx[i].param[3], (char *)&count, sizeof(count), TSDB_DATA_TYPE_INT); + + pInterpDetail->ts = pQInfo->runtimeEnv.pQuery->window.skey; + pInterpDetail->type = pQuery->interpoType; + } + } + } +} + +void pointInterpSupporterInit(SQuery *pQuery, SPointInterpoSupporter *pInterpoSupport) { + if (isPointInterpoQuery(pQuery)) { + pInterpoSupport->pPrevPoint = malloc(pQuery->numOfCols * POINTER_BYTES); + pInterpoSupport->pNextPoint = malloc(pQuery->numOfCols * POINTER_BYTES); + + pInterpoSupport->numOfCols = pQuery->numOfCols; + + /* get appropriated size for one row data source*/ + int32_t len = 0; + for (int32_t i = 0; i < pQuery->numOfCols; ++i) { + len += pQuery->colList[i].info.bytes; + } + + // assert(PRIMARY_TSCOL_LOADED(pQuery)); + + void *prev = calloc(1, len); + void *next = calloc(1, len); + + int32_t offset = 0; + + for (int32_t i = 0; i < pQuery->numOfCols; ++i) { + pInterpoSupport->pPrevPoint[i] = prev + offset; + pInterpoSupport->pNextPoint[i] = next + offset; + + offset += pQuery->colList[i].info.bytes; + } + } +} + +void pointInterpSupporterDestroy(SPointInterpoSupporter *pPointInterpSupport) { + if (pPointInterpSupport->numOfCols <= 0 || pPointInterpSupport->pPrevPoint == NULL) { + return; + } + + tfree(pPointInterpSupport->pPrevPoint[0]); + tfree(pPointInterpSupport->pNextPoint[0]); + + tfree(pPointInterpSupport->pPrevPoint); + tfree(pPointInterpSupport->pNextPoint); + + pPointInterpSupport->numOfCols = 0; +} + +static void allocMemForInterpo(SQInfo *pQInfo, SQuery *pQuery, void *pMeterObj) { +#if 0 + if (pQuery->interpoType != TSDB_INTERPO_NONE) { + assert(isIntervalQuery(pQuery) || (pQuery->intervalTime == 0 && isPointInterpoQuery(pQuery))); + + if (isIntervalQuery(pQuery)) { + pQInfo->runtimeEnv.pInterpoBuf = malloc(POINTER_BYTES * pQuery->numOfOutputCols); + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + pQInfo->runtimeEnv.pInterpoBuf[i] = + calloc(1, sizeof(tFilePage) + pQuery->pSelectExpr[i].resBytes * pMeterObj->pointsPerFileBlock); + } + } + } +#endif +} + +static int32_t getInitialPageNum(SQInfo *pQInfo) { + SQuery *pQuery = pQInfo->runtimeEnv.pQuery; + int32_t INITIAL_RESULT_ROWS_VALUE = 16; + + int32_t num = 0; + + if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { + num = 128; + } else if (isIntervalQuery(pQuery)) { // time window query, allocate one page for each table + size_t s = taosArrayGetSize(pQInfo->pTableIdList); + num = MAX(s, INITIAL_RESULT_ROWS_VALUE); + } else { // for super table query, one page for each subset +// num = pQInfo->pSidSet->numOfSubSet; + } + + assert(num > 0); + return num; +} + +static int32_t getRowParamForMultiRowsOutput(SQuery *pQuery, bool isSTableQuery) { + int32_t rowparam = 1; + + if (isTopBottomQuery(pQuery) && (!isSTableQuery)) { + rowparam = pQuery->pSelectExpr[1].pBase.arg->argValue.i64; + } + + return rowparam; +} + +static int32_t getNumOfRowsInResultPage(SQuery *pQuery, bool isSTableQuery) { + int32_t rowSize = pQuery->rowSize * getRowParamForMultiRowsOutput(pQuery, isSTableQuery); + return (DEFAULT_INTERN_BUF_SIZE - sizeof(tFilePage)) / rowSize; +} + +char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) { + assert(pResult != NULL && pRuntimeEnv != NULL); + + SQuery * pQuery = pRuntimeEnv->pQuery; + tFilePage *page = getResultBufferPageById(pRuntimeEnv->pResultBuf, pResult->pos.pageId); + + int32_t numOfRows = getNumOfRowsInResultPage(pQuery, pRuntimeEnv->stableQuery); + int32_t realRowId = pResult->pos.rowId * getRowParamForMultiRowsOutput(pQuery, pRuntimeEnv->stableQuery); + + return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * numOfRows + + pQuery->pSelectExpr[columnIndex].resBytes * realRowId; +} + +void vnodeQueryFreeQInfoEx(SQInfo *pQInfo) { + if (pQInfo == NULL) { + return; + } + + SQuery *pQuery = pQInfo->runtimeEnv.pQuery; + + teardownQueryRuntimeEnv(&pQInfo->runtimeEnv); + + // tSidSetDestroy(&pQInfo->pSidSet); + + if (pQInfo->pTableDataInfo != NULL) { +// size_t num = taosHashGetSize(pQInfo->pTableIdList); + for (int32_t j = 0; j < 0; ++j) { + destroyMeterQueryInfo(pQInfo->pTableDataInfo[j].pTableQInfo, pQuery->numOfOutputCols); + } + } + + tfree(pQInfo->pTableDataInfo); +} + +int32_t vnodeSTableQueryPrepare(SQInfo *pQInfo, SQuery *pQuery, void *param) { + if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) || + (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) { + dTrace("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey, + pQuery->window.ekey, pQuery->order.order); + + sem_post(&pQInfo->dataReady); + // pQInfo->over = 1; + + return TSDB_CODE_SUCCESS; + } + + pQuery->status = 0; + + pQInfo->rec = (SResultRec){0}; + pQuery->rec = (SResultRec){0}; + + changeExecuteScanOrder(pQuery, true); + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + + /* + * since we employ the output control mechanism in main loop. + * so, disable it during data block scan procedure. + */ + setScanLimitationByResultBuffer(pQuery); + + // save raw query range for applying to each subgroup + pQuery->lastKey = pQuery->window.skey; + + // create runtime environment +// SColumnModel *pTagSchemaInfo = pQInfo->pSidSet->pColumnModel; + + // get one queried meter + assert(0); + // SMeterObj *pMeter = getMeterObj(pQInfo->pTableIdList, pQInfo->pSidSet->pTableIdList[0]->sid); + + pRuntimeEnv->pTSBuf = param; + pRuntimeEnv->cur.vnodeIndex = -1; + + // set the ts-comp file traverse order + if (param != NULL) { + int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSQL_SO_ASC : TSQL_SO_DESC; + tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order); + } + + assert(0); + // int32_t ret = setupQueryRuntimeEnv(pMeter, pQuery, &pQInfo->runtimeEnv, pTagSchemaInfo, TSQL_SO_ASC, true); + // if (ret != TSDB_CODE_SUCCESS) { + // return ret; + // } + + // tSidSetSort(pQInfo->pSidSet); + + int32_t size = getInitialPageNum(pQInfo); + int32_t ret = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, size, pQuery->rowSize); + if (ret != TSDB_CODE_SUCCESS) { + return ret; + } + + if (pQuery->intervalTime == 0) { + int16_t type = TSDB_DATA_TYPE_NULL; + + if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { // group by columns not tags; + type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr); + } else { + type = TSDB_DATA_TYPE_INT; // group id + } + + initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, 512, 4096, type); + } + + pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, true); + + STsdbQueryCond cond = {0}; + cond.twindow = (STimeWindow){.skey = pQuery->window.skey, .ekey = pQuery->window.ekey}; + cond.order = pQuery->order.order; + + cond.colList = *pQuery->colList; + SArray *sa = taosArrayInit(1, POINTER_BYTES); + + // for(int32_t i = 0; i < pQInfo->pSidSet->numOfTables; ++i) { + // SMeterObj *p1 = getMeterObj(pQInfo->pTableIdList, pQInfo->pSidSet->pTableIdList[i]->sid); + // taosArrayPush(sa, &p1); + // } + + SArray *cols = taosArrayInit(pQuery->numOfCols, sizeof(pQuery->colList[0])); + for (int32_t i = 0; i < pQuery->numOfCols; ++i) { + taosArrayPush(cols, &pQuery->colList[i]); + } + + pRuntimeEnv->pQueryHandle = tsdbQueryByTableId(&cond, sa, cols); + + // metric query do not invoke interpolation, it will be done at the second-stage merge + if (!isPointInterpoQuery(pQuery)) { + pQuery->interpoType = TSDB_INTERPO_NONE; + } + + TSKEY revisedStime = taosGetIntervalStartTimestamp(pQuery->window.skey, pQuery->intervalTime, pQuery->slidingTimeUnit, + pQuery->precision); + taosInitInterpoInfo(&pRuntimeEnv->interpoInfo, pQuery->order.order, revisedStime, 0, 0); + pRuntimeEnv->stableQuery = true; + + return TSDB_CODE_SUCCESS; +} + +/** + * decrease the refcount for each table involved in this query + * @param pQInfo + */ +void vnodeDecMeterRefcnt(SQInfo *pQInfo) { + if (pQInfo != NULL) { +// assert(taosHashGetSize(pQInfo->pTableIdList) >= 1); + } + +#if 0 + if (pQInfo == NULL || pQInfo->numOfMeters == 1) { + atomic_fetch_sub_32(&pQInfo->pObj->numOfQueries, 1); + dTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pQInfo->pObj->vnode, + pQInfo->pObj->sid, pQInfo->pObj->meterId, pQInfo->pObj->numOfQueries); + } else { + int32_t num = 0; + for (int32_t i = 0; i < pQInfo->numOfMeters; ++i) { + SMeterObj *pMeter = getMeterObj(pQInfo->pTableIdList, pQInfo->pSidSet->pTableIdList[i]->sid); + atomic_fetch_sub_32(&(pMeter->numOfQueries), 1); + + if (pMeter->numOfQueries > 0) { + dTrace("QInfo:%p vid:%d sid:%d meterId:%s, query is over, numOfQueries:%d", pQInfo, pMeter->vnode, pMeter->sid, + pMeter->meterId, pMeter->numOfQueries); + num++; + } + } + + /* + * in order to reduce log output, for all meters of which numOfQueries count are 0, + * we do not output corresponding information + */ + num = pQInfo->numOfMeters - num; + dTrace("QInfo:%p metric query is over, dec query ref for %d meters, numOfQueries on %d meters are 0", pQInfo, + pQInfo->numOfMeters, num); + } +#endif +} + +void setTimestampRange(SQueryRuntimeEnv *pRuntimeEnv, int64_t stime, int64_t etime) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; + + if (functionId == TSDB_FUNC_SPREAD) { + pRuntimeEnv->pCtx[i].param[1].dKey = stime; + pRuntimeEnv->pCtx[i].param[2].dKey = etime; + + pRuntimeEnv->pCtx[i].param[1].nType = TSDB_DATA_TYPE_DOUBLE; + pRuntimeEnv->pCtx[i].param[2].nType = TSDB_DATA_TYPE_DOUBLE; + } + } +} + +static bool needToLoadDataBlock(SQuery *pQuery, SDataStatis *pDataStatis, SQLFunctionCtx *pCtx, + int32_t numOfTotalPoints) { + if (pDataStatis == NULL) { + return true; + } + +#if 0 + for (int32_t k = 0; k < pQuery->numOfFilterCols; ++k) { + SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[k]; + int32_t colIndex = pFilterInfo->info.colIdx; + + // this column not valid in current data block + if (colIndex < 0 || pDataStatis[colIndex].colId != pFilterInfo->info.data.colId) { + continue; + } + + // not support pre-filter operation on binary/nchar data type + if (!vnodeSupportPrefilter(pFilterInfo->info.data.type)) { + continue; + } + + // all points in current column are NULL, no need to check its boundary value + if (pDataStatis[colIndex].numOfNull == numOfTotalPoints) { + continue; + } + + if (pFilterInfo->info.info.type == TSDB_DATA_TYPE_FLOAT) { + float minval = *(double *)(&pDataStatis[colIndex].min); + float maxval = *(double *)(&pDataStatis[colIndex].max); + + for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) { + if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&minval, (char *)&maxval)) { + return true; + } + } + } else { + for (int32_t i = 0; i < pFilterInfo->numOfFilters; ++i) { + if (pFilterInfo->pFilters[i].fp(&pFilterInfo->pFilters[i], (char *)&pDataStatis[colIndex].min, + (char *)&pDataStatis[colIndex].max)) { + return true; + } + } + } + } + + // todo disable this opt code block temporarily + // for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + // int32_t functId = pQuery->pSelectExpr[i].pBase.functionId; + // if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) { + // return top_bot_datablock_filter(&pCtx[i], functId, (char *)&pField[i].min, (char *)&pField[i].max); + // } + // } + +#endif + return true; +} + +// previous time window may not be of the same size of pQuery->intervalTime +static void getNextTimeWindow(SQuery *pQuery, STimeWindow *pTimeWindow) { + int32_t factor = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); + + pTimeWindow->skey += (pQuery->slidingTime * factor); + pTimeWindow->ekey = pTimeWindow->skey + (pQuery->intervalTime - 1); +} + +SArray *loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, SDataBlockInfo *pBlockInfo, SDataStatis **pStatis) { + SQuery * pQuery = pRuntimeEnv->pQuery; + tsdb_query_handle_t pQueryHandle = pRuntimeEnv->pQueryHandle; + + uint32_t r = 0; + SArray * pDataBlock = NULL; + + // STimeWindow *w = &pQueryHandle->window; + + if (pQuery->numOfFilterCols > 0) { + r = BLK_DATA_ALL_NEEDED; + } else { + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; + int32_t colId = pQuery->pSelectExpr[i].pBase.colInfo.colId; + + // r |= aAggs[functionId].dataReqFunc(&pRuntimeEnv->pCtx[i], w->skey, w->ekey, colId); + } + + if (pRuntimeEnv->pTSBuf > 0 || isIntervalQuery(pQuery)) { + r |= BLK_DATA_ALL_NEEDED; + } + } + + if (r == BLK_DATA_NO_NEEDED) { + // qTrace("QInfo:%p vid:%d sid:%d id:%s, slot:%d, data block ignored, brange:%" PRId64 "-%" PRId64 ", + // rows:%d", GET_QINFO_ADDR(pQuery), pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->slot, + // pBlock->keyFirst, pBlock->keyLast, pBlock->numOfPoints); + } else if (r == BLK_DATA_FILEDS_NEEDED) { + if (tsdbRetrieveDataBlockStatisInfo(pRuntimeEnv->pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) { + // return DISK_DATA_LOAD_FAILED; + } + + if (pStatis == NULL) { + pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL); + } + } else { + assert(r == BLK_DATA_ALL_NEEDED); + if (tsdbRetrieveDataBlockStatisInfo(pRuntimeEnv->pQueryHandle, pStatis) != TSDB_CODE_SUCCESS) { + // return DISK_DATA_LOAD_FAILED; + } + + /* + * if this block is completed included in the query range, do more filter operation + * filter the data block according to the value filter condition. + * no need to load the data block, continue for next block + */ + if (!needToLoadDataBlock(pQuery, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->size)) { +#if defined(_DEBUG_VIEW) + dTrace("QInfo:%p fileId:%d, slot:%d, block discarded by per-filter", GET_QINFO_ADDR(pQuery), pQuery->fileId, + pQuery->slot); +#endif + // return DISK_DATA_DISCARDED; + } + + pDataBlock = tsdbRetrieveDataBlock(pRuntimeEnv->pQueryHandle, NULL); + } + + return pDataBlock; +} + +int32_t binarySearchForKey(char *pValue, int num, TSKEY key, int order) { + int firstPos, lastPos, midPos = -1; + int numOfPoints; + TSKEY *keyList; + + if (num <= 0) return -1; + + keyList = (TSKEY *)pValue; + firstPos = 0; + lastPos = num - 1; + + if (order == 0) { + // find the first position which is smaller than the key + while (1) { + if (key >= keyList[lastPos]) return lastPos; + if (key == keyList[firstPos]) return firstPos; + if (key < keyList[firstPos]) return firstPos - 1; + + numOfPoints = lastPos - firstPos + 1; + midPos = (numOfPoints >> 1) + firstPos; + + if (key < keyList[midPos]) { + lastPos = midPos - 1; + } else if (key > keyList[midPos]) { + firstPos = midPos + 1; + } else { + break; + } + } + + } else { + // find the first position which is bigger than the key + while (1) { + if (key <= keyList[firstPos]) return firstPos; + if (key == keyList[lastPos]) return lastPos; + + if (key > keyList[lastPos]) { + lastPos = lastPos + 1; + if (lastPos >= num) + return -1; + else + return lastPos; + } + + numOfPoints = lastPos - firstPos + 1; + midPos = (numOfPoints >> 1) + firstPos; + + if (key < keyList[midPos]) { + lastPos = midPos - 1; + } else if (key > keyList[midPos]) { + firstPos = midPos + 1; + } else { + break; + } + } + } + + return midPos; +} + +static int64_t doScanAllDataBlocks(SQueryRuntimeEnv *pRuntimeEnv) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + int64_t cnt = 0; + dTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", lastkey:%" PRId64 ", order:%d", + GET_QINFO_ADDR(pRuntimeEnv), pQuery->window.skey, pQuery->window.ekey, pQuery->lastKey, pQuery->order.order); + + tsdb_query_handle_t pQueryHandle = pRuntimeEnv->pQueryHandle; + + while (tsdbNextDataBlock(pQueryHandle)) { + // check if query is killed or not set the status of query to pass the status check + if (isQueryKilled(pQuery)) { + setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); + return cnt; + } + + SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle); + + if (isIntervalQuery(pQuery) && pRuntimeEnv->windowResInfo.prevSKey == 0) { + TSKEY skey1, ekey1; + STimeWindow w = {0}; + SWindowResInfo* pWindowResInfo = &pRuntimeEnv->windowResInfo; + + if (QUERY_IS_ASC_QUERY(pQuery)) { + doGetAlignedIntervalQueryRangeImpl(pQuery, blockInfo.window.skey, blockInfo.window.skey, pQuery->window.ekey, &skey1, &ekey1, &w); + pWindowResInfo->startTime = w.skey; + pWindowResInfo->prevSKey = w.skey; + } else { + // the start position of the first time window in the endpoint that spreads beyond the queried last timestamp + TSKEY winStart = blockInfo.window.ekey - pQuery->intervalTime; + doGetAlignedIntervalQueryRangeImpl(pQuery, winStart, pQuery->window.ekey, blockInfo.window.ekey, &skey1, &ekey1, &w); + + pWindowResInfo->startTime = pQuery->window.skey; + pWindowResInfo->prevSKey = w.skey; + } + } + + int32_t numOfRes = 0; + + SDataStatis *pStatis = NULL; + SArray *pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, &blockInfo, &pStatis); + int32_t forwardStep = tableApplyFunctionsOnBlock(pRuntimeEnv, &blockInfo, pStatis, binarySearchForKey, &numOfRes, + &pRuntimeEnv->windowResInfo, pDataBlock); + +// dTrace("QInfo:%p check data block, brange:%" PRId64 "-%" PRId64 ", fileId:%d, slot:%d, pos:%d, rows:%d, checked:%d", +// GET_QINFO_ADDR(pQuery), blockInfo.window.skey, blockInfo.window.ekey, pQueryHandle->cur.fileId, pQueryHandle->cur.slot, +// pQuery->pos, blockInfo.size, forwardStep); + + // save last access position + cnt += forwardStep; + if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) { + break; + } + } + + // if the result buffer is not full, set the query completed flag + if (!Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) { + setQueryStatus(pQuery, QUERY_COMPLETED); + } + + if (isIntervalQuery(pQuery) && IS_MASTER_SCAN(pRuntimeEnv)) { + if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_NO_DATA_TO_CHECK)) { + int32_t step = QUERY_IS_ASC_QUERY(pQuery)? QUERY_ASC_FORWARD_STEP:QUERY_DESC_FORWARD_STEP; + + closeAllTimeWindow(&pRuntimeEnv->windowResInfo); + removeRedundantWindow(&pRuntimeEnv->windowResInfo, pQuery->lastKey - step, step); + pRuntimeEnv->windowResInfo.curIndex = pRuntimeEnv->windowResInfo.size - 1; + } else { + assert(Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)); + } + } + + return cnt; +} + +static void updatelastkey(SQuery *pQuery, STableQueryInfo *pTableQInfo) { pTableQInfo->lastKey = pQuery->lastKey; } + +/* + * set tag value in SQLFunctionCtx + * e.g.,tag information into input buffer + */ +static void doSetTagValueInParam(SColumnModel *pTagSchema, int32_t tagColIdx, void *pMeterSidInfo, tVariant *param) { + assert(tagColIdx >= 0); +#if 0 + int16_t offset = getColumnModelOffset(pTagSchema, tagColIdx); + + void * pStr = (char *)pMeterSidInfo->tags + offset; + SSchema *pCol = getColumnModelSchema(pTagSchema, tagColIdx); + + tVariantDestroy(param); + + if (isNull(pStr, pCol->type)) { + param->nType = TSDB_DATA_TYPE_NULL; + } else { + tVariantCreateFromBinary(param, pStr, pCol->bytes, pCol->type); + } +#endif +} + +void vnodeSetTagValueInParam(tSidSet *pSidSet, SQueryRuntimeEnv *pRuntimeEnv, void *pMeterSidInfo) { + SQuery * pQuery = pRuntimeEnv->pQuery; + SColumnModel *pTagSchema = pSidSet->pColumnModel; + + SSqlFuncExprMsg *pFuncMsg = &pQuery->pSelectExpr[0].pBase; + if (pQuery->numOfOutputCols == 1 && pFuncMsg->functionId == TSDB_FUNC_TS_COMP) { + assert(pFuncMsg->numOfParams == 1); + doSetTagValueInParam(pTagSchema, pFuncMsg->arg->argValue.i64, pMeterSidInfo, &pRuntimeEnv->pCtx[0].tag); + } else { + // set tag value, by which the results are aggregated. + for (int32_t idx = 0; idx < pQuery->numOfOutputCols; ++idx) { + SColIndexEx *pColEx = &pQuery->pSelectExpr[idx].pBase.colInfo; + + // ts_comp column required the tag value for join filter + if (!TSDB_COL_IS_TAG(pColEx->flag)) { + continue; + } + + doSetTagValueInParam(pTagSchema, pColEx->colIdx, pMeterSidInfo, &pRuntimeEnv->pCtx[idx].tag); + } + + // set the join tag for first column + SSqlFuncExprMsg *pFuncMsg = &pQuery->pSelectExpr[0].pBase; + if (pFuncMsg->functionId == TSDB_FUNC_TS && pFuncMsg->colInfo.colIdx == PRIMARYKEY_TIMESTAMP_COL_INDEX && + pRuntimeEnv->pTSBuf != NULL) { + assert(pFuncMsg->numOfParams == 1); + doSetTagValueInParam(pTagSchema, pFuncMsg->arg->argValue.i64, pMeterSidInfo, &pRuntimeEnv->pCtx[0].tag); + } + } +} + +static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowResult *pWindowRes, bool mergeFlag) { + SQuery * pQuery = pRuntimeEnv->pQuery; + SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; + if (!mergeFlag) { + pCtx[i].aOutputBuf = pCtx[i].aOutputBuf + pCtx[i].outputBytes; + pCtx[i].currentStage = FIRST_STAGE_MERGE; + + resetResultInfo(pCtx[i].resultInfo); + aAggs[functionId].init(&pCtx[i]); + } + + pCtx[i].hasNull = true; + pCtx[i].nStartQueryTimestamp = timestamp; + pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes); + // pCtx[i].aInputElemBuf = ((char *)inputSrc->data) + + // ((int32_t)pRuntimeEnv->offset[i] * pRuntimeEnv->numOfRowsPerPage) + + // pCtx[i].outputBytes * inputIdx; + + // in case of tag column, the tag information should be extracted from input buffer + if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) { + tVariantDestroy(&pCtx[i].tag); + tVariantCreateFromBinary(&pCtx[i].tag, pCtx[i].aInputElemBuf, pCtx[i].inputBytes, pCtx[i].inputType); + } + } + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; + if (functionId == TSDB_FUNC_TAG_DUMMY) { + continue; + } + + aAggs[functionId].distMergeFunc(&pCtx[i]); + } +} + +static void printBinaryData(int32_t functionId, char *data, int32_t srcDataType) { + if (functionId == TSDB_FUNC_FIRST_DST || functionId == TSDB_FUNC_LAST_DST) { + switch (srcDataType) { + case TSDB_DATA_TYPE_BINARY: + printf("%" PRId64 ",%s\t", *(TSKEY *)data, (data + TSDB_KEYSIZE + 1)); + break; + case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_BOOL: + printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int8_t *)(data + TSDB_KEYSIZE + 1)); + break; + case TSDB_DATA_TYPE_SMALLINT: + printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int16_t *)(data + TSDB_KEYSIZE + 1)); + break; + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_TIMESTAMP: + printf("%" PRId64 ",%" PRId64 "\t", *(TSKEY *)data, *(TSKEY *)(data + TSDB_KEYSIZE + 1)); + break; + case TSDB_DATA_TYPE_INT: + printf("%" PRId64 ",%d\t", *(TSKEY *)data, *(int32_t *)(data + TSDB_KEYSIZE + 1)); + break; + case TSDB_DATA_TYPE_FLOAT: + printf("%" PRId64 ",%f\t", *(TSKEY *)data, *(float *)(data + TSDB_KEYSIZE + 1)); + break; + case TSDB_DATA_TYPE_DOUBLE: + printf("%" PRId64 ",%lf\t", *(TSKEY *)data, *(double *)(data + TSDB_KEYSIZE + 1)); + break; + } + } else if (functionId == TSDB_FUNC_AVG) { + printf("%lf,%d\t", *(double *)data, *(int32_t *)(data + sizeof(double))); + } else if (functionId == TSDB_FUNC_SPREAD) { + printf("%lf,%lf\t", *(double *)data, *(double *)(data + sizeof(double))); + } else if (functionId == TSDB_FUNC_TWA) { + data += 1; + printf("%lf,%" PRId64 ",%" PRId64 ",%" PRId64 "\t", *(double *)data, *(int64_t *)(data + 8), + *(int64_t *)(data + 16), *(int64_t *)(data + 24)); + } else if (functionId == TSDB_FUNC_MIN || functionId == TSDB_FUNC_MAX) { + switch (srcDataType) { + case TSDB_DATA_TYPE_TINYINT: + case TSDB_DATA_TYPE_BOOL: + printf("%d\t", *(int8_t *)data); + break; + case TSDB_DATA_TYPE_SMALLINT: + printf("%d\t", *(int16_t *)data); + break; + case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_TIMESTAMP: + printf("%" PRId64 "\t", *(int64_t *)data); + break; + case TSDB_DATA_TYPE_INT: + printf("%d\t", *(int *)data); + break; + case TSDB_DATA_TYPE_FLOAT: + printf("%f\t", *(float *)data); + break; + case TSDB_DATA_TYPE_DOUBLE: + printf("%f\t", *(float *)data); + break; + } + } else if (functionId == TSDB_FUNC_SUM) { + if (srcDataType == TSDB_DATA_TYPE_FLOAT || srcDataType == TSDB_DATA_TYPE_DOUBLE) { + printf("%lf\t", *(float *)data); + } else { + printf("%" PRId64 "\t", *(int64_t *)data); + } + } else { + printf("%s\t", data); + } +} + +void UNUSED_FUNC displayInterResult(SData **pdata, SQuery *pQuery, int32_t numOfRows) { +#if 0 + int32_t numOfCols = pQuery->numOfOutputCols; + printf("super table query intermediate result, total:%d\n", numOfRows); + + SQInfo * pQInfo = (SQInfo *)(GET_QINFO_ADDR(pQuery)); + SMeterObj *pMeterObj = pQInfo->pObj; + + for (int32_t j = 0; j < numOfRows; ++j) { + for (int32_t i = 0; i < numOfCols; ++i) { + switch (pQuery->pSelectExpr[i].resType) { + case TSDB_DATA_TYPE_BINARY: { + int32_t colIdx = pQuery->pSelectExpr[i].pBase.colInfo.colIdx; + int32_t type = 0; + + if (TSDB_COL_IS_TAG(pQuery->pSelectExpr[i].pBase.colInfo.flag)) { + type = pQuery->pSelectExpr[i].resType; + } else { + type = pMeterObj->schema[colIdx].type; + } + printBinaryData(pQuery->pSelectExpr[i].pBase.functionId, pdata[i]->data + pQuery->pSelectExpr[i].resBytes * j, + type); + break; + } + case TSDB_DATA_TYPE_TIMESTAMP: + case TSDB_DATA_TYPE_BIGINT: + printf("%" PRId64 "\t", *(int64_t *)(pdata[i]->data + pQuery->pSelectExpr[i].resBytes * j)); + break; + case TSDB_DATA_TYPE_INT: + printf("%d\t", *(int32_t *)(pdata[i]->data + pQuery->pSelectExpr[i].resBytes * j)); + break; + case TSDB_DATA_TYPE_FLOAT: + printf("%f\t", *(float *)(pdata[i]->data + pQuery->pSelectExpr[i].resBytes * j)); + break; + case TSDB_DATA_TYPE_DOUBLE: + printf("%lf\t", *(double *)(pdata[i]->data + pQuery->pSelectExpr[i].resBytes * j)); + break; + } + } + printf("\n"); + } +#endif +} + +typedef struct SCompSupporter { + STableDataInfo **pTableDataInfo; + int32_t * position; + SQInfo * pQInfo; +} SCompSupporter; + +int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) { + int32_t left = *(int32_t *)pLeft; + int32_t right = *(int32_t *)pRight; + + SCompSupporter * supporter = (SCompSupporter *)param; + SQueryRuntimeEnv *pRuntimeEnv = &supporter->pQInfo->runtimeEnv; + + int32_t leftPos = supporter->position[left]; + int32_t rightPos = supporter->position[right]; + + /* left source is exhausted */ + if (leftPos == -1) { + return 1; + } + + /* right source is exhausted*/ + if (rightPos == -1) { + return -1; + } + + SWindowResInfo *pWindowResInfo1 = &supporter->pTableDataInfo[left]->pTableQInfo->windowResInfo; + SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos); + + char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1); + TSKEY leftTimestamp = GET_INT64_VAL(b1); + + SWindowResInfo *pWindowResInfo2 = &supporter->pTableDataInfo[right]->pTableQInfo->windowResInfo; + SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos); + + char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2); + TSKEY rightTimestamp = GET_INT64_VAL(b2); + + if (leftTimestamp == rightTimestamp) { + return 0; + } + + return leftTimestamp > rightTimestamp ? 1 : -1; +} + +int32_t mergeMetersResultToOneGroups(SQInfo *pQInfo) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + int64_t st = taosGetTimestampMs(); + int32_t ret = TSDB_CODE_SUCCESS; + +// while (pQInfo->subgroupIdx < pQInfo->pSidSet->numOfSubSet) { +// int32_t start = pQInfo->pSidSet->starterPos[pQInfo->subgroupIdx]; +// int32_t end = pQInfo->pSidSet->starterPos[pQInfo->subgroupIdx + 1]; +// +// assert(0); +// // ret = doMergeMetersResultsToGroupRes(pQInfo, pQuery, pRuntimeEnv, pQInfo->pTableDataInfo, start, end); +// if (ret < 0) { // not enough disk space to save the data into disk +// return -1; +// } +// +// pQInfo->subgroupIdx += 1; +// +// // this group generates at least one result, return results +// if (ret > 0) { +// break; +// } +// +// assert(pQInfo->numOfGroupResultPages == 0); +// dTrace("QInfo:%p no result in group %d, continue", GET_QINFO_ADDR(pQuery), pQInfo->subgroupIdx - 1); +// } +// +// dTrace("QInfo:%p merge res data into group, index:%d, total group:%d, elapsed time:%lldms", GET_QINFO_ADDR(pQuery), +// pQInfo->subgroupIdx - 1, pQInfo->pSidSet->numOfSubSet, taosGetTimestampMs() - st); + + return TSDB_CODE_SUCCESS; +} + +void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) { + if (pQInfo->offset == pQInfo->numOfGroupResultPages) { + pQInfo->numOfGroupResultPages = 0; + + // current results of group has been sent to client, try next group + if (mergeMetersResultToOneGroups(pQInfo) != TSDB_CODE_SUCCESS) { + return; // failed to save data in the disk + } + + // set current query completed +// if (pQInfo->numOfGroupResultPages == 0 && pQInfo->subgroupIdx == pQInfo->pSidSet->numOfSubSet) { + // pQInfo->tableIndex = pQInfo->pSidSet->numOfTables; +// return; +// } + } + + SQueryRuntimeEnv * pRuntimeEnv = &pQInfo->runtimeEnv; + SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; + + int32_t id = getGroupResultId(pQInfo->subgroupIdx - 1); + SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id); + + int32_t total = 0; + for (int32_t i = 0; i < list.size; ++i) { + tFilePage *pData = getResultBufferPageById(pResultBuf, list.pData[i]); + total += pData->numOfElems; + } + + pQuery->sdata[0]->num = total; + + int32_t offset = 0; + for (int32_t num = 0; num < list.size; ++num) { + tFilePage *pData = getResultBufferPageById(pResultBuf, list.pData[num]); + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes; + char * pDest = pQuery->sdata[i]->data; + + memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->numOfElems, + bytes * pData->numOfElems); + } + + offset += pData->numOfElems; + } + + assert(pQuery->rec.pointsRead == 0); + + pQuery->rec.pointsRead += pQuery->sdata[0]->num; + pQInfo->offset += 1; +} + +int64_t getNumOfResultWindowRes(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pWindowRes) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + int64_t maxOutput = 0; + for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { + int32_t functionId = pQuery->pSelectExpr[j].pBase.functionId; + + /* + * ts, tag, tagprj function can not decide the output number of current query + * the number of output result is decided by main output + */ + if (functionId == TSDB_FUNC_TS || functionId == TSDB_FUNC_TAG || functionId == TSDB_FUNC_TAGPRJ) { + continue; + } + + SResultInfo *pResultInfo = &pWindowRes->resultInfo[j]; + if (pResultInfo != NULL && maxOutput < pResultInfo->numOfRes) { + maxOutput = pResultInfo->numOfRes; + } + } + + return maxOutput; +} + +int32_t doMergeMetersResultsToGroupRes(SQInfo *pQInfo, STableDataInfo *pTableDataInfo, int32_t start, int32_t end) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pQInfo->runtimeEnv.pQuery; + + tFilePage ** buffer = (tFilePage **)pQuery->sdata; + int32_t * posList = calloc((end - start), sizeof(int32_t)); + STableDataInfo **pTableList = malloc(POINTER_BYTES * (end - start)); + + // todo opt for the case of one table per group + int32_t numOfMeters = 0; + for (int32_t i = start; i < end; ++i) { + int32_t sid = pTableDataInfo[i].pTableQInfo->sid; + + SIDList list = getDataBufPagesIdList(pRuntimeEnv->pResultBuf, sid); + if (list.size > 0 && pTableDataInfo[i].pTableQInfo->windowResInfo.size > 0) { + pTableList[numOfMeters] = &pTableDataInfo[i]; + numOfMeters += 1; + } + } + + if (numOfMeters == 0) { + tfree(posList); + tfree(pTableList); + + assert(pQInfo->numOfGroupResultPages == 0); + return 0; + } + + SCompSupporter cs = {pTableList, posList, pQInfo}; + + SLoserTreeInfo *pTree = NULL; + tLoserTreeCreate(&pTree, numOfMeters, &cs, tableResultComparFn); + + SResultInfo *pResultInfo = calloc(pQuery->numOfOutputCols, sizeof(SResultInfo)); + setWindowResultInfo(pResultInfo, pQuery, pRuntimeEnv->stableQuery); + + resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo); + int64_t lastTimestamp = -1; + + int64_t startt = taosGetTimestampMs(); + + while (1) { + int32_t pos = pTree->pNode[0].index; + + SWindowResInfo *pWindowResInfo = &pTableList[pos]->pTableQInfo->windowResInfo; + SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]); + + char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes); + TSKEY ts = GET_INT64_VAL(b); + + assert(ts == pWindowRes->window.skey); + int64_t num = getNumOfResultWindowRes(pRuntimeEnv, pWindowRes); + if (num <= 0) { + cs.position[pos] += 1; + + if (cs.position[pos] >= pWindowResInfo->size) { + cs.position[pos] = -1; + + // all input sources are exhausted + if (--numOfMeters == 0) { + break; + } + } + } else { + if (ts == lastTimestamp) { // merge with the last one + doMerge(pRuntimeEnv, ts, pWindowRes, true); + } else { // copy data to disk buffer + assert(0); + // if (buffer[0]->numOfElems == pQuery->pointsToRead) { + // if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) { + // return -1; + // } + + // resetMergeResultBuf(pQuery, pRuntimeEnv->pCtx, pResultInfo); + // } + + doMerge(pRuntimeEnv, ts, pWindowRes, false); + buffer[0]->numOfElems += 1; + } + + lastTimestamp = ts; + + cs.position[pos] += 1; + if (cs.position[pos] >= pWindowResInfo->size) { + cs.position[pos] = -1; + + // all input sources are exhausted + if (--numOfMeters == 0) { + break; + } + } + } + + tLoserTreeAdjust(pTree, pos + pTree->numOfEntries); + } + + if (buffer[0]->numOfElems != 0) { // there are data in buffer + if (flushFromResultBuf(pQInfo) != TSDB_CODE_SUCCESS) { + // dError("QInfo:%p failed to flush data into temp file, abort query", GET_QINFO_ADDR(pQuery), + // pQInfo->extBufFile); + tfree(pTree); + tfree(pTableList); + tfree(posList); + tfree(pResultInfo); + + return -1; + } + } + + int64_t endt = taosGetTimestampMs(); + +#ifdef _DEBUG_VIEW + displayInterResult(pQuery->sdata, pQuery, pQuery->sdata[0]->len); +#endif + + dTrace("QInfo:%p result merge completed, elapsed time:%" PRId64 " ms", GET_QINFO_ADDR(pQuery), endt - startt); + tfree(pTree); + tfree(pTableList); + tfree(posList); + + pQInfo->offset = 0; + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + tfree(pResultInfo[i].interResultBuf); + } + + tfree(pResultInfo); + return pQInfo->numOfGroupResultPages; +} + +int32_t flushFromResultBuf(SQInfo *pQInfo) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; + int32_t capacity = (DEFAULT_INTERN_BUF_SIZE - sizeof(tFilePage)) / pQuery->rowSize; + + // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000. + int32_t pageId = -1; + + int32_t remain = pQuery->sdata[0]->num; + int32_t offset = 0; + + while (remain > 0) { + int32_t r = remain; + if (r > capacity) { + r = capacity; + } + + int32_t id = getGroupResultId(pQInfo->subgroupIdx) + pQInfo->numOfGroupResultPages; + tFilePage *buf = getNewDataBuf(pResultBuf, id, &pageId); + + // pagewise copy to dest buffer + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes; + buf->numOfElems = r; + + memcpy(buf->data + pRuntimeEnv->offset[i] * buf->numOfElems, ((char *)pQuery->sdata[i]->data) + offset * bytes, + buf->numOfElems * bytes); + } + + offset += r; + remain -= r; + } + + pQInfo->numOfGroupResultPages += 1; + return TSDB_CODE_SUCCESS; +} + +void resetMergeResultBuf(SQuery *pQuery, SQLFunctionCtx *pCtx, SResultInfo *pResultInfo) { + for (int32_t k = 0; k < pQuery->numOfOutputCols; ++k) { + pCtx[k].aOutputBuf = pQuery->sdata[k]->data - pCtx[k].outputBytes; + pCtx[k].size = 1; + pCtx[k].startOffset = 0; + pCtx[k].resultInfo = &pResultInfo[k]; + + pQuery->sdata[k]->num = 0; + } +} + +void setMeterDataInfo(STableDataInfo *pTableDataInfo, void *pMeterObj, int32_t meterIdx, int32_t groupId) { + pTableDataInfo->pMeterObj = pMeterObj; + pTableDataInfo->groupIdx = groupId; + pTableDataInfo->tableIndex = meterIdx; +} + +static void doDisableFunctsForSupplementaryScan(SQuery *pQuery, SWindowResInfo *pWindowResInfo, int32_t order) { + for (int32_t i = 0; i < pWindowResInfo->size; ++i) { + SWindowStatus *pStatus = getTimeWindowResStatus(pWindowResInfo, i); + if (!pStatus->closed) { + continue; + } + + SWindowResult *buf = getWindowResult(pWindowResInfo, i); + + // open/close the specified query for each group result + for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { + int32_t functId = pQuery->pSelectExpr[j].pBase.functionId; + + if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSQL_SO_DESC) || + ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSQL_SO_ASC)) { + buf->resultInfo[j].complete = false; + } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) { + buf->resultInfo[j].complete = true; + } + } + } +} + +void disableFunctForTableSuppleScan(SQueryRuntimeEnv *pRuntimeEnv, int32_t order) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + // group by normal columns and interval query on normal table + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + pRuntimeEnv->pCtx[i].order = (pRuntimeEnv->pCtx[i].order) ^ 1u; + } + + SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) { + doDisableFunctsForSupplementaryScan(pQuery, pWindowResInfo, order); + } else { // for simple result of table query, + for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { + int32_t functId = pQuery->pSelectExpr[j].pBase.functionId; + SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[j]; + + if (((functId == TSDB_FUNC_FIRST || functId == TSDB_FUNC_FIRST_DST) && order == TSQL_SO_DESC) || + ((functId == TSDB_FUNC_LAST || functId == TSDB_FUNC_LAST_DST) && order == TSQL_SO_ASC)) { + pCtx->resultInfo->complete = false; + } else if (functId != TSDB_FUNC_TS && functId != TSDB_FUNC_TAG) { + pCtx->resultInfo->complete = true; + } + } + } + + pQuery->order.order = pQuery->order.order ^ 1u; +} + +void disableFunctForSuppleScan(SQInfo *pQInfo, int32_t order) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + pRuntimeEnv->pCtx[i].order = (pRuntimeEnv->pCtx[i].order) ^ 1u; + } + + if (isIntervalQuery(pQuery)) { + size_t numOfTables = taosArrayGetSize(pQInfo->pTableIdList); + + for (int32_t i = 0; i < numOfTables; ++i) { + STableQueryInfo *pTableQueryInfo = pQInfo->pTableDataInfo[i].pTableQInfo; + SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo; + + doDisableFunctsForSupplementaryScan(pQuery, pWindowResInfo, order); + } + } else { + SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + doDisableFunctsForSupplementaryScan(pQuery, pWindowResInfo, order); + } + + pQuery->order.order = (pQuery->order.order) ^ 1u; +} + +void enableFunctForMasterScan(SQueryRuntimeEnv *pRuntimeEnv, int32_t order) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + pRuntimeEnv->pCtx[i].order = (pRuntimeEnv->pCtx[i].order) ^ 1u; + } + + pQuery->order.order = (pQuery->order.order) ^ 1u; +} + +void createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, SPosInfo *posInfo) { + int32_t numOfCols = pQuery->numOfOutputCols; + + pResultRow->resultInfo = calloc((size_t)numOfCols, sizeof(SResultInfo)); + pResultRow->pos = *posInfo; + + // set the intermediate result output buffer + setWindowResultInfo(pResultRow->resultInfo, pQuery, isSTableQuery); +} + +void resetCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; + pCtx->aOutputBuf = pQuery->sdata[i]->data; + + /* + * set the output buffer information and intermediate buffer + * not all queries require the interResultBuf, such as COUNT/TAGPRJ/PRJ/TAG etc. + */ + resetResultInfo(&pRuntimeEnv->resultInfo[i]); + pCtx->resultInfo = &pRuntimeEnv->resultInfo[i]; + + // set the timestamp output buffer for top/bottom/diff query + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; + if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) { + pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf; + } + + memset(pQuery->sdata[i]->data, 0, (size_t)pQuery->pSelectExpr[i].resBytes * pQuery->capacity); + } + + initCtxOutputBuf(pRuntimeEnv); +} + +void forwardCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, int64_t output) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + // reset the execution contexts + for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { + int32_t functionId = pQuery->pSelectExpr[j].pBase.functionId; + assert(functionId != TSDB_FUNC_DIFF); + + // set next output position + if (IS_OUTER_FORWARD(aAggs[functionId].nStatus)) { + pRuntimeEnv->pCtx[j].aOutputBuf += pRuntimeEnv->pCtx[j].outputBytes * output; + } + + if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) { + /* + * NOTE: for top/bottom query, the value of first column of output (timestamp) are assigned + * in the procedure of top/bottom routine + * the output buffer in top/bottom routine is ptsOutputBuf, so we need to forward the output buffer + * + * diff function is handled in multi-output function + */ + pRuntimeEnv->pCtx[j].ptsOutputBuf += TSDB_KEYSIZE * output; + } + + resetResultInfo(pRuntimeEnv->pCtx[j].resultInfo); + } +} + +void initCtxOutputBuf(SQueryRuntimeEnv *pRuntimeEnv) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { + int32_t functionId = pQuery->pSelectExpr[j].pBase.functionId; + pRuntimeEnv->pCtx[j].currentStage = 0; + + aAggs[functionId].init(&pRuntimeEnv->pCtx[j]); + } +} + +void doSkipResults(SQueryRuntimeEnv *pRuntimeEnv) { + SQuery *pQuery = pRuntimeEnv->pQuery; + if (pQuery->rec.pointsRead == 0 || pQuery->limit.offset == 0) { + return; + } + + if (pQuery->rec.pointsRead <= pQuery->limit.offset) { + pQuery->limit.offset -= pQuery->rec.pointsRead; + + pQuery->rec.pointsRead = 0; + // pQuery->pointsOffset = pQuery->rec.pointsToRead; // clear all data in result buffer + + resetCtxOutputBuf(pRuntimeEnv); + + // clear the buffer is full flag if exists + pQuery->status &= (~QUERY_RESBUF_FULL); + } else { + int32_t numOfSkip = (int32_t)pQuery->limit.offset; + pQuery->rec.pointsRead -= numOfSkip; + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; + int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes; + assert(0); + // memmove(pQuery->sdata[i]->data, pQuery->sdata[i]->data + bytes * numOfSkip, pQuery->pointsRead * bytes); + pRuntimeEnv->pCtx[i].aOutputBuf += bytes * numOfSkip; + + if (functionId == TSDB_FUNC_DIFF || functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM) { + pRuntimeEnv->pCtx[i].ptsOutputBuf += TSDB_KEYSIZE * numOfSkip; + } + } + + pQuery->limit.offset = 0; + } +} + +typedef struct SQueryStatus { + int8_t overStatus; + TSKEY lastKey; + STSCursor cur; +} SQueryStatus; + +// todo refactor +static void queryStatusSave(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatus *pStatus) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + pStatus->overStatus = pQuery->status; + pStatus->lastKey = pQuery->lastKey; + + pStatus->cur = tsBufGetCursor(pRuntimeEnv->pTSBuf); // save the cursor + + if (pRuntimeEnv->pTSBuf) { + pRuntimeEnv->pTSBuf->cur.order ^= 1u; + tsBufNextPos(pRuntimeEnv->pTSBuf); + } + + setQueryStatus(pQuery, QUERY_NOT_COMPLETED); + + SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY); + pQuery->lastKey = pQuery->window.skey; +} + +static void queryStatusRestore(SQueryRuntimeEnv *pRuntimeEnv, SQueryStatus *pStatus) { + SQuery *pQuery = pRuntimeEnv->pQuery; + SWAP(pQuery->window.skey, pQuery->window.ekey, TSKEY); + + pQuery->lastKey = pStatus->lastKey; + pQuery->status = pStatus->overStatus; + + tsBufSetCursor(pRuntimeEnv->pTSBuf, &pStatus->cur); +} + +static void doSingleMeterSupplementScan(SQueryRuntimeEnv *pRuntimeEnv) { + SQuery * pQuery = pRuntimeEnv->pQuery; + SQueryStatus qStatus = {0}; + + if (!needSupplementaryScan(pQuery)) { + return; + } + + dTrace("QInfo:%p start to supp scan", GET_QINFO_ADDR(pQuery)); + SET_SUPPLEMENT_SCAN_FLAG(pRuntimeEnv); + + // close necessary function execution during supplementary scan + disableFunctForTableSuppleScan(pRuntimeEnv, pQuery->order.order); + queryStatusSave(pRuntimeEnv, &qStatus); + + STimeWindow w = {.skey = pQuery->window.skey, .ekey = pQuery->window.ekey}; + + // reverse scan from current position + tsdbpos_t current = tsdbDataBlockTell(pRuntimeEnv->pQueryHandle); + tsdbResetQuery(pRuntimeEnv->pQueryHandle, &w, current, pQuery->order.order); + + doScanAllDataBlocks(pRuntimeEnv); + + queryStatusRestore(pRuntimeEnv, &qStatus); + enableFunctForMasterScan(pRuntimeEnv, pQuery->order.order); + SET_MASTER_SCAN_FLAG(pRuntimeEnv); +} + +void setQueryStatus(SQuery *pQuery, int8_t status) { + if (status == QUERY_NOT_COMPLETED) { + pQuery->status = status; + } else { + // QUERY_NOT_COMPLETED is not compatible with any other status, so clear its position first + pQuery->status &= (~QUERY_NOT_COMPLETED); + pQuery->status |= status; + } +} + +bool needScanDataBlocksAgain(SQueryRuntimeEnv *pRuntimeEnv) { + SQuery *pQuery = pRuntimeEnv->pQuery; + bool toContinue = false; + + if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) { + // for each group result, call the finalize function for each column + SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + + for (int32_t i = 0; i < pWindowResInfo->size; ++i) { + SWindowResult *pResult = getWindowResult(pWindowResInfo, i); + if (!pResult->status.closed) { + continue; + } + + setWindowResOutputBuf(pRuntimeEnv, pResult); + + for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { + int16_t functId = pQuery->pSelectExpr[j].pBase.functionId; + if (functId == TSDB_FUNC_TS) { + continue; + } + + aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]); + SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]); + + toContinue |= (!pResInfo->complete); + } + } + } else { + for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { + int16_t functId = pQuery->pSelectExpr[j].pBase.functionId; + if (functId == TSDB_FUNC_TS) { + continue; + } + + aAggs[functId].xNextStep(&pRuntimeEnv->pCtx[j]); + SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[j]); + + toContinue |= (!pResInfo->complete); + } + } + + return toContinue; +} + +void vnodeScanAllData(SQueryRuntimeEnv *pRuntimeEnv) { + SQuery *pQuery = pRuntimeEnv->pQuery; + setQueryStatus(pQuery, QUERY_NOT_COMPLETED); + + // store the start query position + void *pos = tsdbDataBlockTell(pRuntimeEnv->pQueryHandle); + + int64_t skey = pQuery->lastKey; + int32_t status = pQuery->status; + int32_t activeSlot = pRuntimeEnv->windowResInfo.curIndex; + + SET_MASTER_SCAN_FLAG(pRuntimeEnv); + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); + + while (1) { + doScanAllDataBlocks(pRuntimeEnv); + + if (!needScanDataBlocksAgain(pRuntimeEnv)) { + // restore the status + if (pRuntimeEnv->scanFlag == REPEAT_SCAN) { + pQuery->status = status; + } + + break; + } + + /* + * set the correct start position, and load the corresponding block in buffer for next + * round scan all data blocks. + */ + int32_t ret = tsdbDataBlockSeek(pRuntimeEnv->pQueryHandle, pos); + + status = pQuery->status; + pRuntimeEnv->windowResInfo.curIndex = activeSlot; + + setQueryStatus(pQuery, QUERY_NOT_COMPLETED); + pRuntimeEnv->scanFlag = REPEAT_SCAN; + + /* check if query is killed or not */ + if (isQueryKilled(pQuery)) { + setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); + return; + } + } + + // no need to set the end key + TSKEY lkey = pQuery->lastKey; + TSKEY ekey = pQuery->window.ekey; + + pQuery->window.skey = skey; + pQuery->window.ekey = pQuery->lastKey - step; + tsdbpos_t current = tsdbDataBlockTell(pRuntimeEnv->pQueryHandle); + + doSingleMeterSupplementScan(pRuntimeEnv); + + // update the pQuery->window.skey and pQuery->window.ekey to limit the scan scope of sliding query during + // supplementary scan + pQuery->lastKey = lkey; + pQuery->window.ekey = ekey; + + STimeWindow win = {.skey = pQuery->window.skey, .ekey = pQuery->window.ekey}; + tsdbResetQuery(pRuntimeEnv->pQueryHandle, &win, current, pQuery->order.order); + tsdbNextDataBlock(pRuntimeEnv->pQueryHandle); +} + +void doFinalizeResult(SQueryRuntimeEnv *pRuntimeEnv) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) { + // for each group result, call the finalize function for each column + SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { + closeAllTimeWindow(pWindowResInfo); + } + + for (int32_t i = 0; i < pWindowResInfo->size; ++i) { + SWindowResult *buf = &pWindowResInfo->pResult[i]; + if (!isWindowResClosed(pWindowResInfo, i)) { + continue; + } + + setWindowResOutputBuf(pRuntimeEnv, buf); + + for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { + aAggs[pQuery->pSelectExpr[j].pBase.functionId].xFinalize(&pRuntimeEnv->pCtx[j]); + } + + /* + * set the number of output results for group by normal columns, the number of output rows usually is 1 except + * the top and bottom query + */ + buf->numOfRows = getNumOfResult(pRuntimeEnv); + } + + } else { + for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { + aAggs[pQuery->pSelectExpr[j].pBase.functionId].xFinalize(&pRuntimeEnv->pCtx[j]); + } + } +} + +static bool hasMainOutput(SQuery *pQuery) { + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; + + if (functionId != TSDB_FUNC_TS && functionId != TSDB_FUNC_TAG && functionId != TSDB_FUNC_TAGPRJ) { + return true; + } + } + + return false; +} + +STableQueryInfo *createMeterQueryInfo(SQInfo *pQInfo, int32_t sid, TSKEY skey, TSKEY ekey) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + + STableQueryInfo *pTableQueryInfo = calloc(1, sizeof(STableQueryInfo)); + + pTableQueryInfo->win = (STimeWindow){ + .skey = skey, + .ekey = ekey, + }; + pTableQueryInfo->lastKey = skey; + + pTableQueryInfo->sid = sid; + pTableQueryInfo->cur.vnodeIndex = -1; + + initWindowResInfo(&pTableQueryInfo->windowResInfo, pRuntimeEnv, 100, 100, TSDB_DATA_TYPE_INT); + return pTableQueryInfo; +} + +void destroyMeterQueryInfo(STableQueryInfo *pTableQueryInfo, int32_t numOfCols) { + if (pTableQueryInfo == NULL) { + return; + } + + cleanupTimeWindowInfo(&pTableQueryInfo->windowResInfo, numOfCols); + free(pTableQueryInfo); +} + +void changeMeterQueryInfoForSuppleQuery(SQuery *pQuery, STableQueryInfo *pTableQueryInfo, TSKEY skey, TSKEY ekey) { + if (pTableQueryInfo == NULL) { + return; + } + + // order has change already! + int32_t step = GET_FORWARD_DIRECTION_FACTOR(pQuery->order.order); + if (!QUERY_IS_ASC_QUERY(pQuery)) { + assert(pTableQueryInfo->win.ekey >= pTableQueryInfo->lastKey + step); + } else { + assert(pTableQueryInfo->win.ekey <= pTableQueryInfo->lastKey + step); + } + + pTableQueryInfo->win.ekey = pTableQueryInfo->lastKey + step; + + SWAP(pTableQueryInfo->win.skey, pTableQueryInfo->win.ekey, TSKEY); + pTableQueryInfo->lastKey = pTableQueryInfo->win.skey; + + pTableQueryInfo->cur.order = pTableQueryInfo->cur.order ^ 1u; + pTableQueryInfo->cur.vnodeIndex = -1; +} + +void restoreIntervalQueryRange(SQueryRuntimeEnv *pRuntimeEnv, STableQueryInfo *pTableQueryInfo) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + pQuery->window.skey = pTableQueryInfo->win.skey; + pQuery->window.ekey = pTableQueryInfo->win.ekey; + pQuery->lastKey = pTableQueryInfo->lastKey; + + assert(((pQuery->lastKey >= pQuery->window.skey) && QUERY_IS_ASC_QUERY(pQuery)) || + ((pQuery->lastKey <= pQuery->window.skey) && !QUERY_IS_ASC_QUERY(pQuery))); +} + +/** + * set output buffer for different group + * @param pRuntimeEnv + * @param pDataBlockInfoEx + */ +void setExecutionContext(SQInfo *pQInfo, STableQueryInfo *pTableQueryInfo, int32_t meterIdx, int32_t groupIdx, + TSKEY nextKey) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SWindowResInfo * pWindowResInfo = &pRuntimeEnv->windowResInfo; + int32_t GROUPRESULTID = 1; + + SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&groupIdx, sizeof(groupIdx)); + if (pWindowRes == NULL) { + return; + } + + /* + * not assign result buffer yet, add new result buffer + * all group belong to one result set, and each group result has different group id so set the id to be one + */ + if (pWindowRes->pos.pageId == -1) { + if (addNewWindowResultBuf(pWindowRes, pRuntimeEnv->pResultBuf, GROUPRESULTID, pRuntimeEnv->numOfRowsPerPage) != + TSDB_CODE_SUCCESS) { + return; + } + } + + setWindowResOutputBuf(pRuntimeEnv, pWindowRes); + initCtxOutputBuf(pRuntimeEnv); + + pTableQueryInfo->lastKey = nextKey; + setAdditionalInfo(pQInfo, meterIdx, pTableQueryInfo); +} + +static void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + // Note: pResult->pos[i]->numOfElems == 0, there is only fixed number of results for each group + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; + pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult); + + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; + if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) { + pCtx->ptsOutputBuf = pRuntimeEnv->pCtx[0].aOutputBuf; + } + + /* + * set the output buffer information and intermediate buffer + * not all queries require the interResultBuf, such as COUNT + */ + pCtx->resultInfo = &pResult->resultInfo[i]; + + // set super table query flag + SResultInfo *pResInfo = GET_RES_INFO(pCtx); + pResInfo->superTableQ = pRuntimeEnv->stableQuery; + } +} + +int32_t setAdditionalInfo(SQInfo *pQInfo, int32_t meterIdx, STableQueryInfo *pTableQueryInfo) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + assert(pTableQueryInfo->lastKey > 0); + + // vnodeSetTagValueInParam(pQInfo->pSidSet, pRuntimeEnv, pQInfo->pMeterSidExtInfo[meterIdx]); + + // both the master and supplement scan needs to set the correct ts comp start position + if (pRuntimeEnv->pTSBuf != NULL) { + if (pTableQueryInfo->cur.vnodeIndex == -1) { + pTableQueryInfo->tag = pRuntimeEnv->pCtx[0].tag.i64Key; + + tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, pTableQueryInfo->tag); + + // keep the cursor info of current meter + pTableQueryInfo->cur = pRuntimeEnv->pTSBuf->cur; + } else { + tsBufSetCursor(pRuntimeEnv->pTSBuf, &pTableQueryInfo->cur); + } + } + + return 0; +} + +/* + * There are two cases to handle: + * + * 1. Query range is not set yet (queryRangeSet = 0). we need to set the query range info, including pQuery->lastKey, + * pQuery->window.skey, and pQuery->eKey. + * 2. Query range is set and query is in progress. There may be another result with the same query ranges to be + * merged during merge stage. In this case, we need the pTableQueryInfo->lastResRows to decide if there + * is a previous result generated or not. + */ +void setIntervalQueryRange(STableQueryInfo *pTableQueryInfo, SQInfo *pQInfo, TSKEY key) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + if (pTableQueryInfo->queryRangeSet) { + pQuery->lastKey = key; + pTableQueryInfo->lastKey = key; + } else { + pQuery->window.skey = key; + STimeWindow win = {.skey = key, pQuery->window.ekey}; + + // for too small query range, no data in this interval. + if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey < pQuery->window.skey)) || + (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey < pQuery->window.ekey))) { + return; + } + + /** + * In handling the both ascending and descending order super table query, we need to find the first qualified + * timestamp of this table, and then set the first qualified start timestamp. + * In ascending query, key is the first qualified timestamp. However, in the descending order query, additional + * operations involve. + */ + TSKEY skey1, ekey1; + STimeWindow w = {0}; + SWindowResInfo *pWindowResInfo = &pTableQueryInfo->windowResInfo; + + doGetAlignedIntervalQueryRangeImpl(pQuery, win.skey, win.skey, win.ekey, &skey1, &ekey1, &w); + pWindowResInfo->startTime = pQuery->window.skey; // windowSKey may be 0 in case of 1970 timestamp + + if (pWindowResInfo->prevSKey == 0) { + if (QUERY_IS_ASC_QUERY(pQuery)) { + pWindowResInfo->prevSKey = w.skey; + } else { + assert(win.ekey == pQuery->window.skey); + pWindowResInfo->prevSKey = w.skey; + } + } + + pTableQueryInfo->queryRangeSet = 1; + pTableQueryInfo->lastKey = pQuery->window.skey; + pTableQueryInfo->win.skey = pQuery->window.skey; + + pQuery->lastKey = pQuery->window.skey; + } +} + +bool requireTimestamp(SQuery *pQuery) { + for (int32_t i = 0; i < pQuery->numOfOutputCols; i++) { + int32_t functionId = pQuery->pSelectExpr[i].pBase.functionId; + if ((aAggs[functionId].nStatus & TSDB_FUNCSTATE_NEED_TS) != 0) { + return true; + } + } + return false; +} + +bool needPrimaryTimestampCol(SQuery *pQuery, SDataBlockInfo *pDataBlockInfo) { + /* + * 1. if skey or ekey locates in this block, we need to load the timestamp column to decide the precise position + * 2. if there are top/bottom, first_dst/last_dst functions, we need to load timestamp column in any cases; + */ + STimeWindow *w = &pDataBlockInfo->window; + bool loadPrimaryTS = (pQuery->lastKey >= w->skey && pQuery->lastKey <= w->ekey) || + (pQuery->window.ekey >= w->skey && pQuery->window.ekey <= w->ekey) || requireTimestamp(pQuery); + + return loadPrimaryTS; +} + +bool onDemandLoadDatablock(SQuery *pQuery, int16_t queryRangeSet) { + return (pQuery->intervalTime == 0) || ((queryRangeSet == 1) && (isIntervalQuery(pQuery))); +} + +static int32_t getNumOfSubset(SQInfo *pQInfo) { + SQuery *pQuery = pQInfo->runtimeEnv.pQuery; + + int32_t totalSubset = 0; + if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || (isIntervalQuery(pQuery))) { + totalSubset = numOfClosedTimeWindow(&pQInfo->runtimeEnv.windowResInfo); + } else { +// totalSubset = pQInfo->pSidSet->numOfSubSet; + } + + return totalSubset; +} + +static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResult *result, int32_t orderType) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + int32_t numOfResult = 0; + int32_t startIdx = 0; + int32_t step = -1; + + dTrace("QInfo:%p start to copy data from windowResInfo to pQuery buf", GET_QINFO_ADDR(pQuery)); + int32_t totalSubset = getNumOfSubset(pQInfo); + + if (orderType == TSQL_SO_ASC) { + startIdx = pQInfo->subgroupIdx; + step = 1; + } else { // desc order copy all data + startIdx = totalSubset - pQInfo->subgroupIdx - 1; + step = -1; + } + + for (int32_t i = startIdx; (i < totalSubset) && (i >= 0); i += step) { + if (result[i].numOfRows == 0) { + pQInfo->offset = 0; + pQInfo->subgroupIdx += 1; + continue; + } + + assert(result[i].numOfRows >= 0 && pQInfo->offset <= 1); + + int32_t numOfRowsToCopy = result[i].numOfRows - pQInfo->offset; + int32_t oldOffset = pQInfo->offset; + assert(0); + + /* + * current output space is not enough to keep all the result data of this group, only copy partial results + * to SQuery object's result buffer + */ + // if (numOfRowsToCopy > pQuery->pointsToRead - numOfResult) { + // numOfRowsToCopy = pQuery->pointsToRead - numOfResult; + // pQInfo->offset += numOfRowsToCopy; + // } else { + // pQInfo->offset = 0; + // pQInfo->subgroupIdx += 1; + // } + + for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { + int32_t size = pRuntimeEnv->pCtx[j].outputBytes; + + char *out = pQuery->sdata[j]->data + numOfResult * size; + char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]); + memcpy(out, in + oldOffset * size, size * numOfRowsToCopy); + } + + numOfResult += numOfRowsToCopy; + assert(0); + // if (numOfResult == pQuery->rec.pointsToRead) { + // break; + // } + } + + dTrace("QInfo:%p copy data to SQuery buf completed", GET_QINFO_ADDR(pQuery)); + +#ifdef _DEBUG_VIEW + displayInterResult(pQuery->sdata, pQuery, numOfResult); +#endif + return numOfResult; +} + +/** + * copyFromWindowResToSData support copy data in ascending/descending order + * For interval query of both super table and table, copy the data in ascending order, since the output results are + * ordered in SWindowResutl already. While handling the group by query for both table and super table, + * all group result are completed already. + * + * @param pQInfo + * @param result + */ +void copyFromWindowResToSData(SQInfo *pQInfo, SWindowResult *result) { + SQuery *pQuery = pQInfo->runtimeEnv.pQuery; + + int32_t orderType = (pQuery->pGroupbyExpr != NULL) ? pQuery->pGroupbyExpr->orderType : TSQL_SO_ASC; + int32_t numOfResult = doCopyToSData(pQInfo, result, orderType); + + pQuery->rec.pointsRead += numOfResult; + // assert(pQuery->rec.pointsRead <= pQuery->pointsToRead); +} + +static void updateWindowResNumOfRes(SQueryRuntimeEnv *pRuntimeEnv, STableDataInfo *pTableDataInfo) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + // update the number of result for each, only update the number of rows for the corresponding window result. + if (pQuery->intervalTime == 0) { + int32_t g = pTableDataInfo->groupIdx; + assert(pRuntimeEnv->windowResInfo.size > 0); + + SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, (char *)&g, sizeof(g)); + if (pWindowRes->numOfRows == 0) { + pWindowRes->numOfRows = getNumOfResult(pRuntimeEnv); + } + } +} + +void stableApplyFunctionsOnBlock_(SQInfo *pQInfo, STableDataInfo *pTableDataInfo, SDataBlockInfo *pDataBlockInfo, + SDataStatis *pStatis, SArray *pDataBlock, __block_search_fn_t searchFn) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + STableQueryInfo * pTableQueryInfo = pTableDataInfo->pTableQInfo; + SWindowResInfo * pWindowResInfo = &pTableQueryInfo->windowResInfo; + + int32_t numOfRes = 0; + if (pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) { + // numOfRes = rowwiseApplyAllFunctions(pRuntimeEnv, &forwardStep, pFields, pDataBlockInfo, pWindowResInfo); + } else { + numOfRes = blockwiseApplyAllFunctions(pRuntimeEnv, pStatis, pDataBlockInfo, pWindowResInfo, searchFn, pDataBlock); + } + + assert(numOfRes >= 0); + + updateWindowResNumOfRes(pRuntimeEnv, pTableDataInfo); + updatelastkey(pQuery, pTableQueryInfo); +} + +// we need to split the refstatsult into different packages. +int32_t vnodeGetResultSize(void *thandle, int32_t *numOfRows) { + SQInfo *pQInfo = (SQInfo *)thandle; + SQuery *pQuery = &pQInfo->runtimeEnv.pQuery; + + /* + * get the file size and set the numOfRows to be the file size, since for tsComp query, + * the returned row size is equalled to 1 + * + * TODO handle the case that the file is too large to send back one time + */ + if (isTSCompQuery(pQuery) && (*numOfRows) > 0) { + struct stat fstat; + if (stat(pQuery->sdata[0]->data, &fstat) == 0) { + *numOfRows = fstat.st_size; + return fstat.st_size; + } else { + dError("QInfo:%p failed to get file info, path:%s, reason:%s", pQInfo, pQuery->sdata[0]->data, strerror(errno)); + return 0; + } + } else { + return pQuery->rowSize * (*numOfRows); + } +} + +int64_t vnodeGetOffsetVal(void *thandle) { + SQInfo *pQInfo = (SQInfo *)thandle; + return pQInfo->runtimeEnv.pQuery->limit.offset; +} + +bool vnodeHasRemainResults(void *handle) { + SQInfo *pQInfo = (SQInfo *)handle; + + if (pQInfo == NULL || pQInfo->runtimeEnv.pQuery->interpoType == TSDB_INTERPO_NONE) { + return false; + } + + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + SInterpolationInfo *pInterpoInfo = &pRuntimeEnv->interpoInfo; + if (pQuery->limit.limit > 0 && pQInfo->rec.pointsRead >= pQuery->limit.limit) { + return false; + } + + int32_t remain = taosNumOfRemainPoints(pInterpoInfo); + if (remain > 0) { + return true; + } else { + if (pRuntimeEnv->pInterpoBuf == NULL) { + return false; + } + + // query has completed + if (Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_NO_DATA_TO_CHECK)) { + TSKEY ekey = taosGetRevisedEndKey(pQuery->window.ekey, pQuery->order.order, pQuery->intervalTime, + pQuery->slidingTimeUnit, pQuery->precision); + // int32_t numOfTotal = taosGetNumOfResultWithInterpo(pInterpoInfo, (TSKEY + // *)pRuntimeEnv->pInterpoBuf[0]->data, + // remain, pQuery->intervalTime, ekey, + // pQuery->pointsToRead); + // return numOfTotal > 0; + assert(0); + return false; + } + + return false; + } +} + +static int32_t resultInterpolate(SQInfo *pQInfo, tFilePage **data, tFilePage **pDataSrc, int32_t numOfRows, + int32_t outputRows) { +#if 0 + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery *pQuery = &pRuntimeEnv->pQuery; + + assert(pRuntimeEnv->pCtx[0].outputBytes == TSDB_KEYSIZE); + + // build support structure for performing interpolation + SSchema *pSchema = calloc(1, sizeof(SSchema) * pQuery->numOfOutputCols); + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + pSchema[i].bytes = pRuntimeEnv->pCtx[i].outputBytes; + pSchema[i].type = pQuery->pSelectExpr[i].resType; + } + +// SColumnModel *pModel = createColumnModel(pSchema, pQuery->numOfOutputCols, pQuery->pointsToRead); + + char * srcData[TSDB_MAX_COLUMNS] = {0}; + int32_t functions[TSDB_MAX_COLUMNS] = {0}; + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + srcData[i] = pDataSrc[i]->data; + functions[i] = pQuery->pSelectExpr[i].pBase.functionId; + } + + assert(0); +// int32_t numOfRes = taosDoInterpoResult(&pRuntimeEnv->interpoInfo, pQuery->interpoType, data, numOfRows, outputRows, +// pQuery->intervalTime, (int64_t *)pDataSrc[0]->data, pModel, srcData, +// pQuery->defaultVal, functions, pRuntimeEnv->pTabObj->pointsPerFileBlock); + + destroyColumnModel(pModel); + free(pSchema); +#endif + return 0; +} + +static void doCopyQueryResultToMsg(SQInfo *pQInfo, int32_t numOfRows, char *data) { +#if 0 + SMeterObj *pObj = pQInfo->pObj; + SQuery * pQuery = &pQInfo->query; + + int tnumOfRows = vnodeList[pObj->vnode].cfg.rowsInFileBlock; + + // for metric query, bufIndex always be 0. + for (int32_t col = 0; col < pQuery->numOfOutputCols; ++col) { // pQInfo->bufIndex == 0 + int32_t bytes = pQuery->pSelectExpr[col].resBytes; + + memmove(data, pQuery->sdata[col]->data, bytes * numOfRows); + data += bytes * numOfRows; + } +#endif +} + +/** + * Copy the result data/file to output message buffer. + * If the result is in file format, read file from disk and copy to output buffer, compression is not involved since + * data in file is already compressed. + * In case of other result in buffer, compress the result before copy once the tsComressMsg is set. + * + * @param handle + * @param data + * @param numOfRows the number of rows that are not returned in current retrieve + * @return + */ +int32_t vnodeCopyQueryResultToMsg(void *handle, char *data, int32_t numOfRows) { + SQInfo *pQInfo = (SQInfo *)handle; + SQuery *pQuery = pQInfo->runtimeEnv.pQuery; + + assert(pQuery->pSelectExpr != NULL && pQuery->numOfOutputCols > 0); + + // load data from file to msg buffer + if (isTSCompQuery(pQuery)) { + int32_t fd = open(pQuery->sdata[0]->data, O_RDONLY, 0666); + + // make sure file exist + if (FD_VALID(fd)) { + size_t s = lseek(fd, 0, SEEK_END); + dTrace("QInfo:%p ts comp data return, file:%s, size:%zu", pQInfo, pQuery->sdata[0]->data, s); + + lseek(fd, 0, SEEK_SET); + read(fd, data, s); + close(fd); + + unlink(pQuery->sdata[0]->data); + } else { + dError("QInfo:%p failed to open tmp file to send ts-comp data to client, path:%s, reason:%s", pQInfo, + pQuery->sdata[0]->data, strerror(errno)); + } + } else { + doCopyQueryResultToMsg(pQInfo, numOfRows, data); + } + + return numOfRows; +} + +int32_t vnodeQueryResultInterpolate(SQInfo *pQInfo, tFilePage **pDst, tFilePage **pDataSrc, int32_t numOfRows, + int32_t *numOfInterpo) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; +#if 0 + while (1) { + numOfRows = taosNumOfRemainPoints(&pRuntimeEnv->interpoInfo); + + TSKEY ekey = taosGetRevisedEndKey(pQuery->window.skey, pQuery->order.order, pQuery->intervalTime, + pQuery->slidingTimeUnit, pQuery->precision); + int32_t numOfFinalRows = taosGetNumOfResultWithInterpo(&pRuntimeEnv->interpoInfo, (TSKEY *)pDataSrc[0]->data, + numOfRows, pQuery->intervalTime, ekey, pQuery->pointsToRead); + + int32_t ret = resultInterpolate(pQInfo, pDst, pDataSrc, numOfRows, numOfFinalRows); + assert(ret == numOfFinalRows); + + /* reached the start position of according to offset value, return immediately */ + if (pQuery->limit.offset == 0) { + return ret; + } + + if (pQuery->limit.offset < ret) { + ret -= pQuery->limit.offset; + // todo !!!!there exactly number of interpo is not valid. + // todo refactor move to the beginning of buffer + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + memmove(pDst[i]->data, pDst[i]->data + pQuery->pSelectExpr[i].resBytes * pQuery->limit.offset, + ret * pQuery->pSelectExpr[i].resBytes); + } + pQuery->limit.offset = 0; + return ret; + } else { + pQuery->limit.offset -= ret; + ret = 0; + } + + if (!vnodeHasRemainResults(pQInfo)) { + return ret; + } + } +#endif +} + +void vnodePrintQueryStatistics(SQInfo *pQInfo) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + + SQuery *pQuery = pRuntimeEnv->pQuery; +#if 0 + SQueryCostSummary *pSummary = &pRuntimeEnv->summary; + if (pRuntimeEnv->pResultBuf == NULL) { + pSummary->tmpBufferInDisk = 0; + } else { + pSummary->tmpBufferInDisk = getResBufSize(pRuntimeEnv->pResultBuf); + } + + dTrace("QInfo:%p statis: comp blocks:%d, size:%d Bytes, elapsed time:%.2f ms", pQInfo, pSummary->readCompInfo, + pSummary->totalCompInfoSize, pSummary->loadCompInfoUs / 1000.0); + + dTrace("QInfo:%p statis: field info: %d, size:%d Bytes, avg size:%.2f Bytes, elapsed time:%.2f ms", pQInfo, + pSummary->readField, pSummary->totalFieldSize, (double)pSummary->totalFieldSize / pSummary->readField, + pSummary->loadFieldUs / 1000.0); + + dTrace( + "QInfo:%p statis: file blocks:%d, size:%d Bytes, elapsed time:%.2f ms, skipped:%d, in-memory gen null:%d Bytes", + pQInfo, pSummary->readDiskBlocks, pSummary->totalBlockSize, pSummary->loadBlocksUs / 1000.0, + pSummary->skippedFileBlocks, pSummary->totalGenData); + + dTrace("QInfo:%p statis: cache blocks:%d", pQInfo, pSummary->blocksInCache, 0); + dTrace("QInfo:%p statis: temp file:%d Bytes", pQInfo, pSummary->tmpBufferInDisk); + + dTrace("QInfo:%p statis: file:%d, table:%d", pQInfo, pSummary->numOfFiles, pSummary->numOfTables); + dTrace("QInfo:%p statis: seek ops:%d", pQInfo, pSummary->numOfSeek); + + double total = pSummary->fileTimeUs + pSummary->cacheTimeUs; + double io = pSummary->loadCompInfoUs + pSummary->loadBlocksUs + pSummary->loadFieldUs; + + // todo add the intermediate result save cost!! + double computing = total - io; + + dTrace( + "QInfo:%p statis: total elapsed time:%.2f ms, file:%.2f ms(%.2f%), cache:%.2f ms(%.2f%). io:%.2f ms(%.2f%)," + "comput:%.2fms(%.2f%)", + pQInfo, total / 1000.0, pSummary->fileTimeUs / 1000.0, pSummary->fileTimeUs * 100 / total, + pSummary->cacheTimeUs / 1000.0, pSummary->cacheTimeUs * 100 / total, io / 1000.0, io * 100 / total, + computing / 1000.0, computing * 100 / total); +#endif +} + +int32_t vnodeQueryTablePrepare(SQInfo *pQInfo, void *param) { + SQuery *pQuery = pQInfo->runtimeEnv.pQuery; + int32_t code = TSDB_CODE_SUCCESS; + + // only the successful complete requries the sem_post/over = 1 operations. + if ((QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.skey > pQuery->window.ekey)) || + (!QUERY_IS_ASC_QUERY(pQuery) && (pQuery->window.ekey > pQuery->window.skey))) { + dTrace("QInfo:%p no result in time range %" PRId64 "-%" PRId64 ", order %d", pQInfo, pQuery->window.skey, + pQuery->window.ekey, pQuery->order.order); + + sem_post(&pQInfo->dataReady); + pQInfo->killed = 1; + return TSDB_CODE_SUCCESS; + } + + setScanLimitationByResultBuffer(pQuery); + changeExecuteScanOrder(pQuery, false); + + pQInfo->rec = (SResultRec){0}; + + // dataInCache requires lastKey value + pQuery->lastKey = pQuery->window.skey; + + STsdbQueryCond cond = {0}; + + cond.twindow = (STimeWindow) {.skey = pQuery->window.skey, .ekey = pQuery->window.ekey}; + cond.order = pQuery->order.order; + cond.colList = *pQuery->colList; + + SArray *cols = taosArrayInit(pQuery->numOfCols, sizeof(pQuery->colList[0])); + for (int32_t i = 0; i < pQuery->numOfCols; ++i) { + taosArrayPush(cols, &pQuery->colList[i]); + } + + pQInfo->runtimeEnv.pQueryHandle = tsdbQueryByTableId(&cond, pQInfo->pTableIdList, cols); + + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + pRuntimeEnv->pQuery = pQuery; + + pRuntimeEnv->pTSBuf = param; + pRuntimeEnv->cur.vnodeIndex = -1; + if (param != NULL) { + int16_t order = (pQuery->order.order == pRuntimeEnv->pTSBuf->tsOrder) ? TSQL_SO_ASC : TSQL_SO_DESC; + tsBufSetTraverseOrder(pRuntimeEnv->pTSBuf, order); + } + + // create runtime environment + code = setupQueryRuntimeEnv(&pQInfo->runtimeEnv, NULL, pQuery->order.order, false); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + pRuntimeEnv->numOfRowsPerPage = getNumOfRowsInResultPage(pQuery, false); + if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || isIntervalQuery(pQuery)) { + int32_t rows = getInitialPageNum(pQInfo); + + code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rows, pQuery->rowSize); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + int16_t type = TSDB_DATA_TYPE_NULL; + if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { + type = getGroupbyColumnType(pQuery, pQuery->pGroupbyExpr); + } else { + type = TSDB_DATA_TYPE_TIMESTAMP; + } + + initWindowResInfo(&pRuntimeEnv->windowResInfo, pRuntimeEnv, rows, 4096, type); + } + + setQueryStatus(pQuery, QUERY_NOT_COMPLETED); + + SPointInterpoSupporter interpInfo = {0}; + pointInterpSupporterInit(pQuery, &interpInfo); + + /* + * in case of last_row query without query range, we set the query timestamp to + * pMeterObj->lastKey. Otherwise, keep the initial query time range unchanged. + */ + if (isFirstLastRowQuery(pQuery) && notHasQueryTimeRange(pQuery)) { + if (!normalizeUnBoundLastRowQuery(pQInfo, &interpInfo)) { + sem_post(&pQInfo->dataReady); + pointInterpSupporterDestroy(&interpInfo); + return TSDB_CODE_SUCCESS; + } + } + + /* + * here we set the value for before and after the specified time into the + * parameter for interpolation query + */ + pointInterpSupporterSetData(pQInfo, &interpInfo); + pointInterpSupporterDestroy(&interpInfo); + + // todo move to other location + // if (!forwardQueryStartPosIfNeeded(pQInfo, pQInfo, dataInDisk, dataInCache)) { + // return TSDB_CODE_SUCCESS; + // } + + int64_t rs = taosGetIntervalStartTimestamp(pQuery->window.skey, pQuery->intervalTime, pQuery->slidingTimeUnit, + pQuery->precision); + taosInitInterpoInfo(&pRuntimeEnv->interpoInfo, pQuery->order.order, rs, 0, 0); + // allocMemForInterpo(pQInfo, pQuery, pMeterObj); + + if (!isPointInterpoQuery(pQuery)) { + // assert(pQuery->pos >= 0 && pQuery->slot >= 0); + } + + // the pQuery->window.skey is changed during normalizedFirstQueryRange, so set the newest lastkey value + pQuery->lastKey = pQuery->window.skey; + pRuntimeEnv->stableQuery = false; + + return TSDB_CODE_SUCCESS; +} + +static bool isGroupbyEachTable(SSqlGroupbyExpr *pGroupbyExpr, tSidSet *pSidset) { + if (pGroupbyExpr == NULL || pGroupbyExpr->numOfGroupCols == 0) { + return false; + } + + for (int32_t i = 0; i < pGroupbyExpr->numOfGroupCols; ++i) { + SColIndexEx *pColIndex = &pGroupbyExpr->columnInfo[i]; + if (pColIndex->flag == TSDB_COL_TAG) { + // assert(pSidset->numOfTables == pSidset->numOfSubSet); + return true; + } + } + + return false; +} + +static bool doCheckWithPrevQueryRange(SQuery *pQuery, TSKEY nextKey) { + if ((nextKey > pQuery->window.ekey && QUERY_IS_ASC_QUERY(pQuery)) || + (nextKey < pQuery->window.ekey && !QUERY_IS_ASC_QUERY(pQuery))) { + return false; + } + + return true; +} + +static void enableExecutionForNextTable(SQueryRuntimeEnv *pRuntimeEnv) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + SResultInfo *pResInfo = GET_RES_INFO(&pRuntimeEnv->pCtx[i]); + if (pResInfo != NULL) { + pResInfo->complete = false; + } + } +} + +static void queryOnDataBlocks(SQInfo *pQInfo, STableDataInfo *pMeterDataInfo) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + // SMeterObj * pTempMeter = getMeterObj(pSupporter->pMetersHashTable, pSupporter->pMeterSidExtInfo[0]->sid); + // __block_search_fn_t searchFn = vnodeSearchKeyFunc[pTempMeter->searchAlgorithm]; + + // dTrace("QInfo:%p start to check data blocks in %d files", pQInfo, pVnodeFileInfo->numOfFiles); + + tsdb_query_handle_t *pQueryHandle = pRuntimeEnv->pQueryHandle; + while (tsdbNextDataBlock(pQueryHandle)) { + if (isQueryKilled(pQuery)) { + break; + } + + // prepare the STableDataInfo struct for each table + + SDataBlockInfo blockInfo = tsdbRetrieveDataBlockInfo(pQueryHandle); + // SMeterObj * pMeterObj = getMeterObj(pSupporter->pMetersHashTable, blockInfo.sid); + + // pQInfo->pObj = pMeterObj; + // pRuntimeEnv->pMeterObj = pMeterObj; + + STableDataInfo *pTableDataInfo = NULL; + // for (int32_t i = 0; i < pSupporter->pSidSet->numOfTables; ++i) { + // if (pMeterDataInfo[i].pMeterObj == pMeterObj) { + // pTableDataInfo = &pMeterDataInfo[i]; + // break; + // } + // } + + assert(pTableDataInfo != NULL); + STableQueryInfo *pTableQueryInfo = pTableDataInfo->pTableQInfo; + + if (pTableDataInfo->pTableQInfo == NULL) { + // pTableDataInfo->pTableQInfo = createMeterQueryInfo(pQInfo, pMeterObj->sid, pQuery->skey, pQuery->ekey); + } + + restoreIntervalQueryRange(pRuntimeEnv, pTableQueryInfo); + + SDataStatis *pStatis = NULL; + SArray * pDataBlock = loadDataBlockOnDemand(pRuntimeEnv, &blockInfo, &pStatis); + + TSKEY nextKey = blockInfo.window.ekey; + if (pQuery->intervalTime == 0) { + setExecutionContext(pQInfo, pTableQueryInfo, pTableDataInfo->tableIndex, pTableDataInfo->groupIdx, nextKey); + } else { // interval query + setIntervalQueryRange(pTableQueryInfo, pQInfo, nextKey); + int32_t ret = setAdditionalInfo(pQInfo, pTableDataInfo->tableIndex, pTableQueryInfo); + if (ret != TSDB_CODE_SUCCESS) { + // pQInfo->killed = 1; + return; + } + } + + // stableApplyFunctionsOnBlock_(pSupporter, pTableDataInfo, &blockInfo, pStatis, pDataBlock, searchFn); + } +} + +static bool multimeterMultioutputHelper(SQInfo *pQInfo, bool *dataInDisk, bool *dataInCache, int32_t index, + int32_t start) { + // STableIdInfo **pMeterSidExtInfo = pQInfo->pMeterSidExtInfo; + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = &pRuntimeEnv->pQuery; +#if 0 + setQueryStatus(pQuery, QUERY_NOT_COMPLETED); + + SMeterObj *pMeterObj = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[index]->sid); + if (pMeterObj == NULL) { + dError("QInfo:%p do not find required meter id: %d, all meterObjs id is:", pQInfo, pMeterSidExtInfo[index]->sid); + return false; + } + + vnodeSetTagValueInParam(pSupporter->pSidSet, pRuntimeEnv, pMeterSidExtInfo[index]); + + dTrace("QInfo:%p query on (%d): vid:%d sid:%d meterId:%s, qrange:%" PRId64 "-%" PRId64, pQInfo, index - start, + pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey); + + pQInfo->pObj = pMeterObj; + pQuery->lastKey = pQuery->skey; + pRuntimeEnv->pMeterObj = pMeterObj; + + vnodeUpdateQueryColumnIndex(pQuery, pRuntimeEnv->pMeterObj); + vnodeUpdateFilterColumnIndex(pQuery); + + vnodeCheckIfDataExists(pRuntimeEnv, pMeterObj, dataInDisk, dataInCache); + + // data in file or cache is not qualified for the query. abort + if (!(dataInCache || dataInDisk)) { + dTrace("QInfo:%p vid:%d sid:%d meterId:%s, qrange:%" PRId64 "-%" PRId64 ", nores, %p", pQInfo, pMeterObj->vnode, + pMeterObj->sid, pMeterObj->meterId, pQuery->skey, pQuery->ekey, pQuery); + return false; + } + + if (pRuntimeEnv->pTSBuf != NULL) { + if (pRuntimeEnv->cur.vnodeIndex == -1) { + int64_t tag = pRuntimeEnv->pCtx[0].tag.i64Key; + STSElem elem = tsBufGetElemStartPos(pRuntimeEnv->pTSBuf, 0, tag); + + // failed to find data with the specified tag value + if (elem.vnode < 0) { + return false; + } + } else { + tsBufSetCursor(pRuntimeEnv->pTSBuf, &pRuntimeEnv->cur); + } + } + +#endif + + initCtxOutputBuf(pRuntimeEnv); + return true; +} + +static int64_t doCheckMetersInGroup(SQInfo *pQInfo, int32_t index, int32_t start) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + bool dataInDisk = true; + bool dataInCache = true; + if (!multimeterMultioutputHelper(pQInfo, &dataInDisk, &dataInCache, index, start)) { + return 0; + } + + SPointInterpoSupporter pointInterpSupporter = {0}; + pointInterpSupporterInit(pQuery, &pointInterpSupporter); + assert(0); + + // if (!normalizedFirstQueryRange(dataInDisk, dataInCache, pSupporter, &pointInterpSupporter, NULL)) { + // pointInterpSupporterDestroy(&pointInterpSupporter); + // return 0; + // } + + /* + * here we set the value for before and after the specified time into the + * parameter for interpolation query + */ + pointInterpSupporterSetData(pQInfo, &pointInterpSupporter); + pointInterpSupporterDestroy(&pointInterpSupporter); + + vnodeScanAllData(pRuntimeEnv); + + // first/last_row query, do not invoke the finalize for super table query + doFinalizeResult(pRuntimeEnv); + + int64_t numOfRes = getNumOfResult(pRuntimeEnv); + assert(numOfRes == 1 || numOfRes == 0); + + // accumulate the point interpolation result + if (numOfRes > 0) { + pQuery->rec.pointsRead += numOfRes; + forwardCtxOutputBuf(pRuntimeEnv, numOfRes); + } + + return numOfRes; +} + +/** + * super table query handler + * 1. super table projection query, group-by on normal columns query, ts-comp query + * 2. point interpolation query, last row query + * + * @param pQInfo + */ +static void vnodeSTableSeqProcessor(SQInfo *pQInfo) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + +#if 0 + SQuery* pQuery = pRuntimeEnv->pQuery; +// tSidSet *pTableIdList = pSupporter->pSidSet; + + int32_t vid = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[0]->sid)->vnode; + + if (isPointInterpoQuery(pQuery)) { + resetCtxOutputBuf(pRuntimeEnv); + + assert(pQuery->limit.offset == 0 && pQuery->limit.limit != 0); + + while (pSupporter->subgroupIdx < pTableIdList->numOfSubSet) { + int32_t start = pTableIdList->starterPos[pSupporter->subgroupIdx]; + int32_t end = pTableIdList->starterPos[pSupporter->subgroupIdx + 1] - 1; + + if (isFirstLastRowQuery(pQuery)) { + dTrace("QInfo:%p last_row query on vid:%d, numOfGroups:%d, current group:%d", pQInfo, vid, pTableIdList->numOfSubSet, + pSupporter->subgroupIdx); + + TSKEY key = -1; + int32_t index = -1; + + // choose the last key for one group + pSupporter->meterIdx = start; + + for (int32_t k = start; k <= end; ++k, pSupporter->meterIdx++) { + if (isQueryKilled(pQuery)) { + setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); + return; + } + + // get the last key of meters that belongs to this group + SMeterObj *pMeterObj = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[k]->sid); + if (pMeterObj != NULL) { + if (key < pMeterObj->lastKey) { + key = pMeterObj->lastKey; + index = k; + } + } + } + + pQuery->skey = key; + pQuery->ekey = key; + pSupporter->rawSKey = key; + pSupporter->rawEKey = key; + + int64_t num = doCheckMetersInGroup(pQInfo, index, start); + assert(num >= 0); + } else { + dTrace("QInfo:%p interp query on vid:%d, numOfGroups:%d, current group:%d", pQInfo, vid, pTableIdList->numOfSubSet, + pSupporter->subgroupIdx); + + for (int32_t k = start; k <= end; ++k) { + if (isQueryKilled(pQuery)) { + setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); + return; + } + + pQuery->skey = pSupporter->rawSKey; + pQuery->ekey = pSupporter->rawEKey; + + int64_t num = doCheckMetersInGroup(pQInfo, k, start); + if (num == 1) { + break; + } + } + } + + pSupporter->subgroupIdx++; + + // output buffer is full, return to client + if (pQuery->pointsRead >= pQuery->pointsToRead) { + break; + } + } + } else { + /* + * 1. super table projection query, 2. group-by on normal columns query, 3. ts-comp query + */ + assert(pSupporter->meterIdx >= 0); + + /* + * if the subgroup index is larger than 0, results generated by group by tbname,k is existed. + * we need to return it to client in the first place. + */ + if (pSupporter->subgroupIdx > 0) { + copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult); + pQInfo->pointsRead += pQuery->pointsRead; + + if (pQuery->pointsRead > 0) { + return; + } + } + + if (pSupporter->meterIdx >= pTableIdList->numOfTables) { + return; + } + + resetCtxOutputBuf(pRuntimeEnv); + resetTimeWindowInfo(pRuntimeEnv, &pRuntimeEnv->windowResInfo); + + while (pSupporter->meterIdx < pSupporter->numOfMeters) { + int32_t k = pSupporter->meterIdx; + + if (isQueryKilled(pQuery)) { + setQueryStatus(pQuery, QUERY_NO_DATA_TO_CHECK); + return; + } + + TSKEY skey = pQInfo->pTableQuerySupporter->pMeterSidExtInfo[k]->key; + if (skey > 0) { + pQuery->skey = skey; + } + + bool dataInDisk = true; + bool dataInCache = true; + if (!multimeterMultioutputHelper(pQInfo, &dataInDisk, &dataInCache, k, 0)) { + pQuery->skey = pSupporter->rawSKey; + pQuery->ekey = pSupporter->rawEKey; + + pSupporter->meterIdx++; + continue; + } + +#if DEFAULT_IO_ENGINE == IO_ENGINE_MMAP + for (int32_t i = 0; i < pRuntimeEnv->numOfFiles; ++i) { + resetMMapWindow(&pRuntimeEnv->pVnodeFiles[i]); + } +#endif + + SPointInterpoSupporter pointInterpSupporter = {0}; + assert(0); +// if (normalizedFirstQueryRange(dataInDisk, dataInCache, pSupporter, &pointInterpSupporter, NULL) == false) { +// pQuery->skey = pSupporter->rawSKey; +// pQuery->ekey = pSupporter->rawEKey; +// +// pSupporter->meterIdx++; +// continue; +// } + + // TODO handle the limit problem + if (pQuery->numOfFilterCols == 0 && pQuery->limit.offset > 0) { + forwardQueryStartPosition(pRuntimeEnv); + + if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) { + pQuery->skey = pSupporter->rawSKey; + pQuery->ekey = pSupporter->rawEKey; + + pSupporter->meterIdx++; + continue; + } + } + + vnodeScanAllData(pRuntimeEnv); + + pQuery->pointsRead = getNumOfResult(pRuntimeEnv); + doSkipResults(pRuntimeEnv); + + // the limitation of output result is reached, set the query completed + if (doRevisedResultsByLimit(pQInfo)) { + pSupporter->meterIdx = pSupporter->pSidSet->numOfTables; + break; + } + + // enable execution for next table, when handling the projection query + enableExecutionForNextTable(pRuntimeEnv); + + if (Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) { + /* + * query range is identical in terms of all meters involved in query, + * so we need to restore them at the *beginning* of query on each meter, + * not the consecutive query on meter on which is aborted due to buffer limitation + * to ensure that, we can reset the query range once query on a meter is completed. + */ + pQuery->skey = pSupporter->rawSKey; + pQuery->ekey = pSupporter->rawEKey; + pSupporter->meterIdx++; + + pQInfo->pTableQuerySupporter->pMeterSidExtInfo[k]->key = pQuery->lastKey; + + // if the buffer is full or group by each table, we need to jump out of the loop + if (Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL) || + isGroupbyEachTable(pQuery->pGroupbyExpr, pSupporter->pSidSet)) { + break; + } + + } else { // forward query range + pQuery->skey = pQuery->lastKey; + + // all data in the result buffer are skipped due to the offset, continue to retrieve data from current meter + if (pQuery->pointsRead == 0) { + assert(!Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL)); + continue; + } else { + pQInfo->pTableQuerySupporter->pMeterSidExtInfo[k]->key = pQuery->lastKey; + // buffer is full, wait for the next round to retrieve data from current meter + assert(Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL)); + break; + } + } + } + } + + /* + * 1. super table projection query, group-by on normal columns query, ts-comp query + * 2. point interpolation query, last row query + * + * group-by on normal columns query and last_row query do NOT invoke the finalizer here, + * since the finalize stage will be done at the client side. + * + * projection query, point interpolation query do not need the finalizer. + * + * Only the ts-comp query requires the finalizer function to be executed here. + */ + if (isTSCompQuery(pQuery)) { + doFinalizeResult(pRuntimeEnv); + } + + if (pRuntimeEnv->pTSBuf != NULL) { + pRuntimeEnv->cur = pRuntimeEnv->pTSBuf->cur; + } + + // todo refactor + if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { + SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + + for (int32_t i = 0; i < pWindowResInfo->size; ++i) { + SWindowStatus *pStatus = &pWindowResInfo->pResult[i].status; + pStatus->closed = true; // enable return all results for group by normal columns + + SWindowResult *pResult = &pWindowResInfo->pResult[i]; + for (int32_t j = 0; j < pQuery->numOfOutputCols; ++j) { + pResult->numOfRows = MAX(pResult->numOfRows, pResult->resultInfo[j].numOfRes); + } + } + + pQInfo->pTableQuerySupporter->subgroupIdx = 0; + pQuery->pointsRead = 0; + copyFromWindowResToSData(pQInfo, pWindowResInfo->pResult); + } + + pQInfo->pointsRead += pQuery->pointsRead; + pQuery->pointsOffset = pQuery->pointsToRead; + + dTrace( + "QInfo %p vid:%d, numOfMeters:%d, index:%d, numOfGroups:%d, %d points returned, totalRead:%d totalReturn:%d," + "next skey:%" PRId64 ", offset:%" PRId64, + pQInfo, vid, pTableIdList->numOfTables, pSupporter->meterIdx, pTableIdList->numOfSubSet, pQuery->pointsRead, pQInfo->pointsRead, + pQInfo->pointsReturned, pQuery->skey, pQuery->limit.offset); +#endif +} + +static void doOrderedScan(SQInfo *pQInfo) { + SQuery *pQuery = &pQInfo->runtimeEnv.pQuery; +#if 0 +// if (pQInfo->runtimeEnv. == NULL) { +// pSupporter->pMeterDataInfo = calloc(pSupporter->pSidSet->numOfTables, sizeof(STableDataInfo)); +// } + + STableIdInfo **pMeterSidExtInfo = pSupporter->pMeterSidExtInfo; + + tSidSet* pSidset = pSupporter->pSidSet; + int32_t groupId = 0; + + for (int32_t i = 0; i < pSidset->numOfTables; ++i) { // load all meter meta info + SMeterObj *pMeterObj = getMeterObj(pSupporter->pMetersHashTable, pMeterSidExtInfo[i]->sid); + if (pMeterObj == NULL) { + dError("QInfo:%p failed to find required sid:%d", pQInfo, pMeterSidExtInfo[i]->sid); + continue; + } + + if (i >= pSidset->starterPos[groupId + 1]) { + groupId += 1; + } + + STableDataInfo *pOneMeterDataInfo = &pSupporter->pMeterDataInfo[i]; + assert(pOneMeterDataInfo->pMeterObj == NULL); + + setMeterDataInfo(pOneMeterDataInfo, pMeterObj, i, groupId); + pOneMeterDataInfo->pTableQInfo = createMeterQueryInfo(pSupporter, pMeterObj->sid, pQuery->skey, pQuery->ekey); + } + + queryOnDataBlocks(pQInfo, pSupporter->pMeterDataInfo); + if (pQInfo->code != TSDB_CODE_SUCCESS) { + return; + } +#endif +} + +static void setupMeterQueryInfoForSupplementQuery(SQInfo *pQInfo) { + SQuery *pQuery = pQInfo->runtimeEnv.pQuery; + + int32_t num = taosHashGetSize(pQInfo->pTableIdList); + for (int32_t i = 0; i < num; ++i) { + // STableQueryInfo *pTableQueryInfo = pSupporter->pMeterDataInfo[i].pTableQInfo; + // changeMeterQueryInfoForSuppleQuery(pQuery, pTableQueryInfo, pSupporter->rawSKey, pSupporter->rawEKey); + } +} + +static void doMultiMeterSupplementaryScan(SQInfo *pQInfo) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + if (!needSupplementaryScan(pQuery)) { + dTrace("QInfo:%p no need to do supplementary scan, query completed", pQInfo); + return; + } + + SET_SUPPLEMENT_SCAN_FLAG(pRuntimeEnv); + // disableFunctForSuppleScan(pSupporter, pQuery->order.order); + + if (pRuntimeEnv->pTSBuf != NULL) { + pRuntimeEnv->pTSBuf->cur.order = pRuntimeEnv->pTSBuf->cur.order ^ 1u; + } + +#if 0 + SWAP(pSupporter->rawSKey, pSupporter->rawEKey, TSKEY); + setupMeterQueryInfoForSupplementQuery(pSupporter); + + int64_t st = taosGetTimestampMs(); + + doOrderedScan(pQInfo); + + int64_t et = taosGetTimestampMs(); + dTrace("QInfo:%p supplementary scan completed, elapsed time: %lldms", pQInfo, et - st); + + /* + * restore the env + * the meter query info is not reset to the original state + */ + SWAP(pSupporter->rawSKey, pSupporter->rawEKey, TSKEY); + enableFunctForMasterScan(pRuntimeEnv, pQuery->order.order); + + if (pRuntimeEnv->pTSBuf != NULL) { + pRuntimeEnv->pTSBuf->cur.order = pRuntimeEnv->pTSBuf->cur.order ^ 1; + } +#endif + SET_MASTER_SCAN_FLAG(pRuntimeEnv); +} + +static void vnodeMultiMeterQueryProcessor(SQInfo *pQInfo) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + if (pQInfo->subgroupIdx > 0) { + /* + * if the subgroupIdx > 0, the query process must be completed yet, we only need to + * copy the data into output buffer + */ + if (pQuery->intervalTime > 0) { + copyResToQueryResultBuf(pQInfo, pQuery); + +#ifdef _DEBUG_VIEW + displayInterResult(pQuery->sdata, pQuery, pQuery->sdata[0]->len); +#endif + } else { + copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult); + } + + pQInfo->rec.pointsRead += pQuery->rec.pointsRead; + + if (pQuery->rec.pointsRead == 0) { + // vnodePrintQueryStatistics(pSupporter); + } + + dTrace("QInfo:%p points returned:%d, totalRead:%d totalReturn:%d", pQInfo, pQuery->rec.pointsRead, + pQInfo->rec.pointsRead, pQInfo->pointsReturned); + return; + } +#if 0 + pSupporter->pMeterDataInfo = (STableDataInfo *)calloc(1, sizeof(STableDataInfo) * pSupporter->numOfMeters); + if (pSupporter->pMeterDataInfo == NULL) { + dError("QInfo:%p failed to allocate memory, %s", pQInfo, strerror(errno)); + pQInfo->code = -TSDB_CODE_SERV_OUT_OF_MEMORY; + return; + } + + dTrace("QInfo:%p query start, qrange:%" PRId64 "-%" PRId64 ", order:%d, group:%d", pQInfo, pSupporter->rawSKey, + pSupporter->rawEKey, pQuery->order.order, pSupporter->pSidSet->numOfSubSet); + + dTrace("QInfo:%p main query scan start", pQInfo); + int64_t st = taosGetTimestampMs(); + doOrderedScan(pQInfo); + + int64_t et = taosGetTimestampMs(); + dTrace("QInfo:%p main scan completed, elapsed time: %lldms, supplementary scan start, order:%d", pQInfo, et - st, + pQuery->order.order ^ 1u); + + if (pQuery->intervalTime > 0) { + for (int32_t i = 0; i < pSupporter->numOfMeters; ++i) { + STableQueryInfo *pTableQueryInfo = pSupporter->pMeterDataInfo[i].pTableQInfo; + closeAllTimeWindow(&pTableQueryInfo->windowResInfo); + } + } else { // close results for group result + closeAllTimeWindow(&pRuntimeEnv->windowResInfo); + } + + doMultiMeterSupplementaryScan(pQInfo); + + if (isQueryKilled(pQuery)) { + dTrace("QInfo:%p query killed, abort", pQInfo); + return; + } + + if (pQuery->intervalTime > 0 || isSumAvgRateQuery(pQuery)) { + assert(pSupporter->subgroupIdx == 0 && pSupporter->numOfGroupResultPages == 0); + + if (mergeMetersResultToOneGroups(pSupporter) == TSDB_CODE_SUCCESS) { + copyResToQueryResultBuf(pSupporter, pQuery); + +#ifdef _DEBUG_VIEW + displayInterResult(pQuery->sdata, pQuery, pQuery->sdata[0]->len); +#endif + } + } else { // not a interval query + copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult); + } + + // handle the limitation of output buffer + pQInfo->pointsRead += pQuery->pointsRead; + dTrace("QInfo:%p points returned:%d, totalRead:%d totalReturn:%d", pQInfo, pQuery->pointsRead, pQInfo->pointsRead, + pQInfo->pointsReturned); +#endif +} + +/* + * in each query, this function will be called only once, no retry for further result. + * + * select count(*)/top(field,k)/avg(field name) from table_name [where ts>now-1a]; + * select count(*) from table_name group by status_column; + */ +static void tableFixedOutputProcessor(SQInfo *pQInfo) { + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery * pQuery = pRuntimeEnv->pQuery; + + vnodeScanAllData(pRuntimeEnv); + doFinalizeResult(pRuntimeEnv); + + if (isQueryKilled(pQuery)) { + return; + } + + // since the numOfOutputElems must be identical for all sql functions that are allowed to be executed simutanelously. + pQuery->rec.pointsRead = getNumOfResult(pRuntimeEnv); + // assert(pQuery->pointsRead <= pQuery->pointsToRead && + // Q_STATUS_EQUAL(pQuery->over, QUERY_COMPLETED | QUERY_NO_DATA_TO_CHECK)); + + // must be top/bottom query if offset > 0 + if (pQuery->limit.offset > 0) { + assert(isTopBottomQuery(pQuery)); + } + + doSkipResults(pRuntimeEnv); + doRevisedResultsByLimit(pQInfo); + + pQInfo->rec.pointsRead = pQuery->rec.pointsRead; +} + +static void tableMultiOutputProcessor(SQInfo *pQInfo) { +#if 0 + SQuery * pQuery = &pQInfo->query; + SMeterObj *pMeterObj = pQInfo->pObj; + + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->pTableQuerySupporter->runtimeEnv; + + // for ts_comp query, re-initialized is not allowed + if (!isTSCompQuery(pQuery)) { + resetCtxOutputBuf(pRuntimeEnv); + } + + while (1) { + vnodeScanAllData(pRuntimeEnv); + doFinalizeResult(pRuntimeEnv); + + if (isQueryKilled(pQuery)) { + return; + } + + pQuery->pointsRead = getNumOfResult(pRuntimeEnv); + if (pQuery->limit.offset > 0 && pQuery->numOfFilterCols > 0 && pQuery->pointsRead > 0) { + doSkipResults(pRuntimeEnv); + } + + /* + * 1. if pQuery->pointsRead == 0, pQuery->limit.offset >= 0, still need to check data + * 2. if pQuery->pointsRead > 0, pQuery->limit.offset must be 0 + */ + if (pQuery->pointsRead > 0 || Q_STATUS_EQUAL(pQuery->over, QUERY_COMPLETED | QUERY_NO_DATA_TO_CHECK)) { + break; + } + + TSKEY nextTimestamp = loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->nextPos); + assert(nextTimestamp > 0 || ((nextTimestamp < 0) && Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK))); + + dTrace("QInfo:%p vid:%d sid:%d id:%s, skip current result, offset:%" PRId64 ", next qrange:%" PRId64 "-%" PRId64, + pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->limit.offset, pQuery->lastKey, + pQuery->ekey); + + resetCtxOutputBuf(pRuntimeEnv); + } + + doRevisedResultsByLimit(pQInfo); + pQInfo->pointsRead += pQuery->pointsRead; + + if (Q_STATUS_EQUAL(pQuery->over, QUERY_RESBUF_FULL)) { + TSKEY nextTimestamp = loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->nextPos); + assert(nextTimestamp > 0 || ((nextTimestamp < 0) && Q_STATUS_EQUAL(pQuery->over, QUERY_NO_DATA_TO_CHECK))); + + dTrace("QInfo:%p vid:%d sid:%d id:%s, query abort due to buffer limitation, next qrange:%" PRId64 "-%" PRId64, + pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->lastKey, pQuery->ekey); + } + + dTrace("QInfo:%p vid:%d sid:%d id:%s, %d points returned, totalRead:%d totalReturn:%d", pQInfo, pMeterObj->vnode, + pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead, pQInfo->pointsRead, pQInfo->pointsReturned); + + pQuery->pointsOffset = pQuery->pointsToRead; // restore the available buffer + if (!isTSCompQuery(pQuery)) { + assert(pQuery->pointsRead <= pQuery->pointsToRead); + } + +#endif +} + +static void vnodeSingleMeterIntervalMainLooper(SQueryRuntimeEnv *pRuntimeEnv) { + SQuery *pQuery = pRuntimeEnv->pQuery; + + while (1) { + initCtxOutputBuf(pRuntimeEnv); + vnodeScanAllData(pRuntimeEnv); + + if (isQueryKilled(pQuery)) { + return; + } + + assert(!Q_STATUS_EQUAL(pQuery->status, QUERY_NOT_COMPLETED)); + doFinalizeResult(pRuntimeEnv); + + // here we can ignore the records in case of no interpolation + // todo handle offset, in case of top/bottom interval query + if ((pQuery->numOfFilterCols > 0 || pRuntimeEnv->pTSBuf != NULL) && pQuery->limit.offset > 0 && + pQuery->interpoType == TSDB_INTERPO_NONE) { + // maxOutput <= 0, means current query does not generate any results + int32_t numOfClosed = numOfClosedTimeWindow(&pRuntimeEnv->windowResInfo); + + int32_t c = MIN(numOfClosed, pQuery->limit.offset); + clearFirstNTimeWindow(pRuntimeEnv, c); + pQuery->limit.offset -= c; + } + + if (Q_STATUS_EQUAL(pQuery->status, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) { + break; + } + + // load the data block for the next retrieve + // loadRequiredBlockIntoMem(pRuntimeEnv, &pRuntimeEnv->nextPos); + if (Q_STATUS_EQUAL(pQuery->status, QUERY_RESBUF_FULL)) { + break; + } + } +} + +/* handle time interval query on single table */ +static void tableIntervalProcessor(SQInfo *pQInfo) { + // STable *pMeterObj = pQInfo->pObj; + + SQueryRuntimeEnv *pRuntimeEnv = &(pQInfo->runtimeEnv); + SQuery * pQuery = pRuntimeEnv->pQuery; + + int32_t numOfInterpo = 0; + + while (1) { + resetCtxOutputBuf(pRuntimeEnv); + vnodeSingleMeterIntervalMainLooper(pRuntimeEnv); + + if (pQuery->intervalTime > 0) { + pQInfo->subgroupIdx = 0; // always start from 0 + pQuery->rec.pointsRead = 0; + copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult); + + clearFirstNTimeWindow(pRuntimeEnv, pQInfo->subgroupIdx); + } + + // the offset is handled at prepare stage if no interpolation involved + if (pQuery->interpoType == TSDB_INTERPO_NONE) { + doRevisedResultsByLimit(pQInfo); + break; + } else { + taosInterpoSetStartInfo(&pRuntimeEnv->interpoInfo, pQuery->rec.pointsRead, pQuery->interpoType); + SData **pInterpoBuf = pRuntimeEnv->pInterpoBuf; + + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + memcpy(pInterpoBuf[i]->data, pQuery->sdata[i]->data, pQuery->rec.pointsRead * pQuery->pSelectExpr[i].resBytes); + } + + numOfInterpo = 0; + pQuery->rec.pointsRead = vnodeQueryResultInterpolate( + pQInfo, (tFilePage **)pQuery->sdata, (tFilePage **)pInterpoBuf, pQuery->rec.pointsRead, &numOfInterpo); + + dTrace("QInfo: %p interpo completed, final:%d", pQInfo, pQuery->rec.pointsRead); + if (pQuery->rec.pointsRead > 0 || Q_STATUS_EQUAL(pQuery->status, QUERY_COMPLETED | QUERY_NO_DATA_TO_CHECK)) { + doRevisedResultsByLimit(pQInfo); + break; + } + + // no result generated yet, continue retrieve data + pQuery->rec.pointsRead = 0; + } + } + + // all data scanned, the group by normal column can return + if (isGroupbyNormalCol(pQuery->pGroupbyExpr)) { // todo refactor with merge interval time result + pQInfo->subgroupIdx = 0; + pQuery->rec.pointsRead = 0; + copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult); + clearFirstNTimeWindow(pRuntimeEnv, pQInfo->subgroupIdx); + } + + pQInfo->rec.pointsRead += pQuery->rec.pointsRead; + pQInfo->pointsInterpo += numOfInterpo; + + // dTrace("%p vid:%d sid:%d id:%s, %d points returned %d points interpo, totalRead:%d totalInterpo:%d + // totalReturn:%d", + // pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead, numOfInterpo, + // pQInfo->pointsRead - pQInfo->pointsInterpo, pQInfo->pointsInterpo, pQInfo->pointsReturned); +} + +void qTableQuery(SQInfo* pQInfo) { + assert(pQInfo != NULL); + + if (pQInfo->killed) { + dTrace("QInfo:%p it is already killed, abort", pQInfo); + return; + } + + SQueryRuntimeEnv *pRuntimeEnv = &pQInfo->runtimeEnv; + SQuery *pQuery = pRuntimeEnv->pQuery; + +// dTrace("vid:%d sid:%d id:%s, query thread is created, numOfQueries:%d, QInfo:%p", pQInfo); + + if (vnodeHasRemainResults(pQInfo)) { + /* + * There are remain results that are not returned due to result interpolation + * So, we do keep in this procedure instead of launching retrieve procedure for next results. + */ + int32_t numOfInterpo = 0; + + int32_t remain = taosNumOfRemainPoints(&pRuntimeEnv->interpoInfo); + pQuery->rec.pointsRead = vnodeQueryResultInterpolate(pQInfo, (tFilePage **)pQuery->sdata, + (tFilePage **)pRuntimeEnv->pInterpoBuf, remain, &numOfInterpo); + + doRevisedResultsByLimit(pQInfo); + + pQInfo->pointsInterpo += numOfInterpo; + pQInfo->rec.pointsRead += pQuery->rec.pointsRead; + +// dTrace("QInfo:%p %d points returned %d points interpo, totalRead:%d totalInterpo:%d totalReturn:%d", +// pQInfo, pQuery->pointsRead, numOfInterpo, pQInfo->pointsRead, pQInfo->pointsInterpo, pQInfo->pointsReturned); + sem_post(&pQInfo->dataReady); + return; + } + + // here we have scan all qualified data in both data file and cache + if (Q_STATUS_EQUAL(pQuery->status, QUERY_NO_DATA_TO_CHECK | QUERY_COMPLETED)) { + // continue to get push data from the group result + if (isGroupbyNormalCol(pQuery->pGroupbyExpr) || + (pQuery->intervalTime > 0 && pQInfo->pointsReturned < pQuery->limit.limit)) { + // todo limit the output for interval query? + pQuery->rec.pointsRead = 0; + pQInfo->subgroupIdx = 0; // always start from 0 + + if (pRuntimeEnv->windowResInfo.size > 0) { + copyFromWindowResToSData(pQInfo, pRuntimeEnv->windowResInfo.pResult); + pQInfo->rec.pointsRead += pQuery->rec.pointsRead; + + clearFirstNTimeWindow(pRuntimeEnv, pQInfo->subgroupIdx); + + if (pQuery->rec.pointsRead > 0) { +// dTrace("QInfo:%p vid:%d sid:%d id:%s, %d points returned %d from group results, totalRead:%d totalReturn:%d", +// pQInfo, pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead, pQInfo->pointsRead, +// pQInfo->pointsInterpo, pQInfo->pointsReturned); + + sem_post(&pQInfo->dataReady); + return; + } + } + } + +// dTrace("QInfo:%p vid:%d sid:%d id:%s, query over, %d points are returned", pQInfo, pMeterObj->vnode, pMeterObj->sid, +// pMeterObj->meterId, pQInfo->pointsRead); + +// vnodePrintQueryStatistics(pSupporter); + sem_post(&pQInfo->dataReady); + return; + } + + // number of points returned during this query + pQuery->rec.pointsRead = 0; + + int64_t st = taosGetTimestampUs(); + + // group by normal column, sliding window query, interval query are handled by interval query processor + if (pQuery->intervalTime != 0 || isGroupbyNormalCol(pQuery->pGroupbyExpr)) { // interval (down sampling operation) +// assert(pQuery->checkBufferInLoop == 0 && pQuery->pointsOffset == pQuery->pointsToRead); + tableIntervalProcessor(pQInfo); + } else { + if (isFixedOutputQuery(pQuery)) { + assert(pQuery->checkBufferInLoop == 0); + + tableFixedOutputProcessor(pQInfo); + } else { // diff/add/multiply/subtract/division + assert(pQuery->checkBufferInLoop == 1); + tableMultiOutputProcessor(pQInfo); + } + } + + // record the total elapsed time + pQInfo->elapsedTime += (taosGetTimestampUs() - st); + + /* check if query is killed or not */ + if (isQueryKilled(pQuery)) { + dTrace("QInfo:%p query is killed", pQInfo); +// pQInfo->over = 1; + } else { +// dTrace("QInfo:%p vid:%d sid:%d id:%s, meter query thread completed, %d points are returned", pQInfo, +// pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQuery->pointsRead); + } + + sem_post(&pQInfo->dataReady); +// vnodeDecRefCount(pQInfo); +} + +void qSuperTableQuery(void *pReadMsg) { +// SQInfo *pQInfo = (SQInfo *)pMsg->ahandle; +// +// if (pQInfo == NULL) { +// return; +// } + +// if (pQInfo->killed) { +// vnodeDecRefCount(pQInfo); +// dTrace("QInfo:%p it is already killed, abort", pQInfo); +// return; +// } + +// assert(pQInfo->refCount >= 1); +#if 0 + SQuery *pQuery = &pQInfo->runtimeEnv.pQuery; + pQuery->rec.pointsRead = 0; + + int64_t st = taosGetTimestampUs(); + if (pQuery->intervalTime > 0 || + (isFixedOutputQuery(pQuery) && (!isPointInterpoQuery(pQuery)) && !isGroupbyNormalCol(pQuery->pGroupbyExpr))) { + assert(pQuery->checkBufferInLoop == 0); + vnodeMultiMeterQueryProcessor(pQInfo); + } else { + assert((pQuery->checkBufferInLoop == 1 && pQuery->intervalTime == 0) || isPointInterpoQuery(pQuery) || + isGroupbyNormalCol(pQuery->pGroupbyExpr)); + + vnodeSTableSeqProcessor(pQInfo); + } + + /* record the total elapsed time */ + pQInfo->elapsedTime += (taosGetTimestampUs() - st); + pQuery->status = isQueryKilled(pQuery) ? 1 : 0; + +// taosInterpoSetStartInfo(&pQInfo->runtimeEnv.interpoInfo, pQuery->pointsRead, +// pQInfo->query.interpoType); + + if (pQuery->rec.pointsRead == 0) { +// pQInfo->over = 1; +// dTrace("QInfo:%p over, %d meters queried, %d points are returned", pQInfo, pSupporter->numOfMeters, +// pQInfo->pointsRead); +// vnodePrintQueryStatistics(pSupporter); + } + + sem_post(&pQInfo->dataReady); +// vnodeDecRefCount(pQInfo); +#endif +} + +static int32_t getColumnIndexInSource(SQueryTableMsg *pQueryTableMsg, SSqlFuncExprMsg *pExprMsg) { + int32_t j = 0; + + while (j < pQueryTableMsg->numOfCols) { + if (pExprMsg->colInfo.colId == pQueryTableMsg->colList[j].colId) { + break; + } + + j += 1; + } + + return j; +} + +bool vnodeValidateExprColumnInfo(SQueryTableMsg *pQueryTableMsg, SSqlFuncExprMsg *pExprMsg) { + int32_t j = getColumnIndexInSource(pQueryTableMsg, pExprMsg); + return j < pQueryTableMsg->numOfCols; +} + +static int32_t validateQueryMeterMsg(SQueryTableMsg *pQueryTableMsg) { + if (pQueryTableMsg->intervalTime < 0) { + dError("qmsg:%p illegal value of aggTimeInterval %" PRId64 "", pQueryTableMsg, pQueryTableMsg->intervalTime); + return -1; + } + + if (pQueryTableMsg->numOfTagsCols < 0 || pQueryTableMsg->numOfTagsCols > TSDB_MAX_TAGS + 1) { + dError("qmsg:%p illegal value of numOfTagsCols %d", pQueryTableMsg, pQueryTableMsg->numOfTagsCols); + return -1; + } + + if (pQueryTableMsg->numOfCols <= 0 || pQueryTableMsg->numOfCols > TSDB_MAX_COLUMNS) { + dError("qmsg:%p illegal value of numOfCols %d", pQueryTableMsg, pQueryTableMsg->numOfCols); + return -1; + } + + if (pQueryTableMsg->numOfTables <= 0) { + dError("qmsg:%p illegal value of numOfTables %d", pQueryTableMsg, pQueryTableMsg->numOfTables); + return -1; + } + + if (pQueryTableMsg->numOfGroupCols < 0) { + dError("qmsg:%p illegal value of numOfGroupbyCols %d", pQueryTableMsg, pQueryTableMsg->numOfGroupCols); + return -1; + } + + if (pQueryTableMsg->numOfOutputCols > TSDB_MAX_COLUMNS || pQueryTableMsg->numOfOutputCols <= 0) { + dError("qmsg:%p illegal value of output columns %d", pQueryTableMsg, pQueryTableMsg->numOfOutputCols); + return -1; + } + + if (pQueryTableMsg->tagLength < 0) { + dError("qmsg:%p illegal value of tag length %d", pQueryTableMsg, pQueryTableMsg->tagLength); + return -1; + } + + return 0; +} + +static int32_t convertQueryTableMsg(SQueryTableMsg *pQueryTableMsg, SArray** pTableIdList) { + pQueryTableMsg->vgId = htons(pQueryTableMsg->vgId); + pQueryTableMsg->numOfTables = htonl(pQueryTableMsg->numOfTables); + + pQueryTableMsg->window.skey = htobe64(pQueryTableMsg->window.skey); + pQueryTableMsg->window.ekey = htobe64(pQueryTableMsg->window.ekey); + + pQueryTableMsg->order = htons(pQueryTableMsg->order); + pQueryTableMsg->orderColId = htons(pQueryTableMsg->orderColId); + + pQueryTableMsg->queryType = htons(pQueryTableMsg->queryType); + + pQueryTableMsg->intervalTime = htobe64(pQueryTableMsg->intervalTime); + pQueryTableMsg->slidingTime = htobe64(pQueryTableMsg->slidingTime); + + pQueryTableMsg->numOfTagsCols = htons(pQueryTableMsg->numOfTagsCols); + pQueryTableMsg->numOfCols = htons(pQueryTableMsg->numOfCols); + pQueryTableMsg->numOfOutputCols = htons(pQueryTableMsg->numOfOutputCols); + pQueryTableMsg->numOfGroupCols = htons(pQueryTableMsg->numOfGroupCols); + pQueryTableMsg->tagLength = htons(pQueryTableMsg->tagLength); + + pQueryTableMsg->limit = htobe64(pQueryTableMsg->limit); + pQueryTableMsg->offset = htobe64(pQueryTableMsg->offset); + + pQueryTableMsg->tsOffset = htonl(pQueryTableMsg->tsOffset); + pQueryTableMsg->tsLen = htonl(pQueryTableMsg->tsLen); + pQueryTableMsg->tsNumOfBlocks = htonl(pQueryTableMsg->tsNumOfBlocks); + pQueryTableMsg->tsOrder = htonl(pQueryTableMsg->tsOrder); + + // query msg safety check + if (validateQueryMeterMsg(pQueryTableMsg) != 0) { + return TSDB_CODE_INVALID_QUERY_MSG; + } + + char *pMsg = (char *)(pQueryTableMsg->colList) + sizeof(SColumnInfo) * pQueryTableMsg->numOfCols; + + for (int32_t col = 0; col < pQueryTableMsg->numOfCols; ++col) { + pQueryTableMsg->colList[col].colId = htons(pQueryTableMsg->colList[col].colId); + pQueryTableMsg->colList[col].type = htons(pQueryTableMsg->colList[col].type); + pQueryTableMsg->colList[col].bytes = htons(pQueryTableMsg->colList[col].bytes); + pQueryTableMsg->colList[col].numOfFilters = htons(pQueryTableMsg->colList[col].numOfFilters); + + assert(pQueryTableMsg->colList[col].type >= TSDB_DATA_TYPE_BOOL && + pQueryTableMsg->colList[col].type <= TSDB_DATA_TYPE_NCHAR); + + int32_t numOfFilters = pQueryTableMsg->colList[col].numOfFilters; + + if (numOfFilters > 0) { + pQueryTableMsg->colList[col].filters = calloc(numOfFilters, sizeof(SColumnFilterInfo)); + } + + for (int32_t f = 0; f < numOfFilters; ++f) { + SColumnFilterInfo *pFilterInfo = (SColumnFilterInfo *)pMsg; + SColumnFilterInfo *pDestFilterInfo = &pQueryTableMsg->colList[col].filters[f]; + + pDestFilterInfo->filterOnBinary = htons(pFilterInfo->filterOnBinary); + + pMsg += sizeof(SColumnFilterInfo); + + if (pDestFilterInfo->filterOnBinary) { + pDestFilterInfo->len = htobe64(pFilterInfo->len); + + pDestFilterInfo->pz = (int64_t)calloc(1, pDestFilterInfo->len + 1); + memcpy((void *)pDestFilterInfo->pz, pMsg, pDestFilterInfo->len + 1); + pMsg += (pDestFilterInfo->len + 1); + } else { + pDestFilterInfo->lowerBndi = htobe64(pFilterInfo->lowerBndi); + pDestFilterInfo->upperBndi = htobe64(pFilterInfo->upperBndi); + } + + pDestFilterInfo->lowerRelOptr = htons(pFilterInfo->lowerRelOptr); + pDestFilterInfo->upperRelOptr = htons(pFilterInfo->upperRelOptr); + } + } + + bool hasArithmeticFunction = false; + + /* + * 1. simple projection query on meters, we only record the pSqlFuncExprs[i].colIdx value + * 2. for complex queries, whole SqlExprs object is required. + */ + pQueryTableMsg->pSqlFuncExprs = (int64_t)malloc(POINTER_BYTES * pQueryTableMsg->numOfOutputCols); + SSqlFuncExprMsg *pExprMsg = (SSqlFuncExprMsg *)pMsg; + + for (int32_t i = 0; i < pQueryTableMsg->numOfOutputCols; ++i) { + ((SSqlFuncExprMsg **)pQueryTableMsg->pSqlFuncExprs)[i] = pExprMsg; + + pExprMsg->colInfo.colIdx = htons(pExprMsg->colInfo.colIdx); + pExprMsg->colInfo.colId = htons(pExprMsg->colInfo.colId); + pExprMsg->colInfo.flag = htons(pExprMsg->colInfo.flag); + + pExprMsg->functionId = htons(pExprMsg->functionId); + pExprMsg->numOfParams = htons(pExprMsg->numOfParams); + + pMsg += sizeof(SSqlFuncExprMsg); + + for (int32_t j = 0; j < pExprMsg->numOfParams; ++j) { + pExprMsg->arg[j].argType = htons(pExprMsg->arg[j].argType); + pExprMsg->arg[j].argBytes = htons(pExprMsg->arg[j].argBytes); + + if (pExprMsg->arg[j].argType == TSDB_DATA_TYPE_BINARY) { + pExprMsg->arg[j].argValue.pz = pMsg; + pMsg += pExprMsg->arg[j].argBytes + 1; // one more for the string terminated char. + } else { + pExprMsg->arg[j].argValue.i64 = htobe64(pExprMsg->arg[j].argValue.i64); + } + } + + if (pExprMsg->functionId == TSDB_FUNC_ARITHM) { + hasArithmeticFunction = true; + } else if (pExprMsg->functionId == TSDB_FUNC_TAG || pExprMsg->functionId == TSDB_FUNC_TAGPRJ || + pExprMsg->functionId == TSDB_FUNC_TAG_DUMMY) { + if (pExprMsg->colInfo.flag != TSDB_COL_TAG) { // ignore the column index check for arithmetic expression. + return TSDB_CODE_INVALID_QUERY_MSG; + } + } else { + if (!vnodeValidateExprColumnInfo(pQueryTableMsg, pExprMsg)) { + return TSDB_CODE_INVALID_QUERY_MSG; + } + } + + pExprMsg = (SSqlFuncExprMsg *)pMsg; + } + + pQueryTableMsg->colNameLen = htonl(pQueryTableMsg->colNameLen); + if (hasArithmeticFunction) { // column name array + assert(pQueryTableMsg->colNameLen > 0); + pQueryTableMsg->colNameList = (int64_t)pMsg; + pMsg += pQueryTableMsg->colNameLen; + } + + *pTableIdList = taosArrayInit(pQueryTableMsg->numOfTables, sizeof(STableIdInfo)); + + STableIdInfo* pTableIdInfo = (STableIdInfo *)pMsg; + pTableIdInfo->sid = htonl(pTableIdInfo->sid); + pTableIdInfo->uid = htobe64(pTableIdInfo->uid); + pTableIdInfo->key = htobe64(pTableIdInfo->key); + + taosArrayPush(*pTableIdList, pTableIdInfo); + pMsg += sizeof(STableIdInfo); + + for (int32_t j = 1; j < pQueryTableMsg->numOfTables; ++j) { + pTableIdInfo = (STableIdInfo *)pMsg; + + pTableIdInfo->sid = htonl(pTableIdInfo->sid); + pTableIdInfo->uid = htobe64(pTableIdInfo->uid); + pTableIdInfo->key = htobe64(pTableIdInfo->key); + + taosArrayPush(*pTableIdList, pTableIdInfo); + pMsg += sizeof(STableIdInfo); + } + + if (pQueryTableMsg->numOfGroupCols > 0 || pQueryTableMsg->numOfTagsCols > 0) { // group by tag columns + pQueryTableMsg->pTagSchema = (uint64_t)pMsg; + SSchema *pTagSchema = (SSchema *)pQueryTableMsg->pTagSchema; + pMsg += sizeof(SSchema) * pQueryTableMsg->numOfTagsCols; + + if (pQueryTableMsg->numOfGroupCols > 0) { + pQueryTableMsg->groupbyTagIds = (uint64_t) & (pTagSchema[pQueryTableMsg->numOfTagsCols]); + } else { + pQueryTableMsg->groupbyTagIds = 0; + } + pQueryTableMsg->orderByIdx = htons(pQueryTableMsg->orderByIdx); + pQueryTableMsg->orderType = htons(pQueryTableMsg->orderType); + + pMsg += sizeof(SColIndexEx) * pQueryTableMsg->numOfGroupCols; + } else { + pQueryTableMsg->pTagSchema = 0; + pQueryTableMsg->groupbyTagIds = 0; + } + + pQueryTableMsg->interpoType = htons(pQueryTableMsg->interpoType); + if (pQueryTableMsg->interpoType != TSDB_INTERPO_NONE) { + pQueryTableMsg->defaultVal = (uint64_t)(pMsg); + + int64_t *v = (int64_t *)pMsg; + for (int32_t i = 0; i < pQueryTableMsg->numOfOutputCols; ++i) { + v[i] = htobe64(v[i]); + } + } + + dTrace("qmsg:%p query on %d meter(s), qrange:%" PRId64 "-%" PRId64 + ", numOfGroupbyTagCols:%d, numOfTagCols:%d, timestamp order:%d, " + "tags order:%d, tags order col:%d, numOfOutputCols:%d, numOfCols:%d, interval:%" PRId64 + ", fillType:%d, comptslen:%d, limit:%" PRId64 + ", " + "offset:%" PRId64, + pQueryTableMsg, pQueryTableMsg->numOfTables, pQueryTableMsg->window.skey, pQueryTableMsg->window.ekey, + pQueryTableMsg->numOfGroupCols, pQueryTableMsg->numOfTagsCols, pQueryTableMsg->order, + pQueryTableMsg->orderType, pQueryTableMsg->orderByIdx, pQueryTableMsg->numOfOutputCols, + pQueryTableMsg->numOfCols, pQueryTableMsg->intervalTime, pQueryTableMsg->interpoType, pQueryTableMsg->tsLen, + pQueryTableMsg->limit, pQueryTableMsg->offset); + + return 0; +} + +static int32_t buildAirthmeticExprFromMsg(SSqlFunctionExpr *pExpr, SQueryTableMsg *pQueryMsg) { + SSqlBinaryExprInfo *pBinaryExprInfo = &pExpr->binExprInfo; + SColumnInfo * pColMsg = pQueryMsg->colList; +#if 0 + tSQLBinaryExpr* pBinExpr = NULL; + SSchema* pSchema = toSchema(pQueryMsg, pColMsg, pQueryMsg->numOfCols); + + dTrace("qmsg:%p create binary expr from string:%s", pQueryMsg, pExpr->pBase.arg[0].argValue.pz); + tSQLBinaryExprFromString(&pBinExpr, pSchema, pQueryMsg->numOfCols, pExpr->pBase.arg[0].argValue.pz, + pExpr->pBase.arg[0].argBytes); + + if (pBinExpr == NULL) { + dError("qmsg:%p failed to create arithmetic expression string from:%s", pQueryMsg, pExpr->pBase.arg[0].argValue.pz); + return TSDB_CODE_APP_ERROR; + } + + pBinaryExprInfo->pBinExpr = pBinExpr; + + int32_t num = 0; + int16_t ids[TSDB_MAX_COLUMNS] = {0}; + + tSQLBinaryExprTrv(pBinExpr, &num, ids); + qsort(ids, num, sizeof(int16_t), id_compar); + + int32_t i = 0, j = 0; + + while (i < num && j < num) { + if (ids[i] == ids[j]) { + j++; + } else { + ids[++i] = ids[j++]; + } + } + assert(i <= num); + + // there may be duplicated referenced columns. + num = i + 1; + pBinaryExprInfo->pReqColumns = malloc(sizeof(SColIndexEx) * num); + + for (int32_t k = 0; k < num; ++k) { + SColIndexEx* pColIndex = &pBinaryExprInfo->pReqColumns[k]; + pColIndex->colId = ids[k]; + } + + pBinaryExprInfo->numOfCols = num; + free(pSchema); +#endif + + return TSDB_CODE_SUCCESS; +} + +static int32_t createSqlFunctionExprFromMsg(SQueryTableMsg *pQueryMsg, SSqlFunctionExpr **pSqlFuncExpr) { + *pSqlFuncExpr = NULL; + int32_t code = TSDB_CODE_SUCCESS; + + SSqlFunctionExpr *pExprs = (SSqlFunctionExpr *)calloc(1, sizeof(SSqlFunctionExpr) * pQueryMsg->numOfOutputCols); + if (pExprs == NULL) { + tfree(pQueryMsg->pSqlFuncExprs); + return TSDB_CODE_SERV_OUT_OF_MEMORY; + } + + bool isSuperTable = QUERY_IS_STABLE_QUERY(pQueryMsg->queryType); + int16_t tagLen = 0; + + SSchema *pTagSchema = (SSchema *)pQueryMsg->pTagSchema; + for (int32_t i = 0; i < pQueryMsg->numOfOutputCols; ++i) { + pExprs[i].pBase = *((SSqlFuncExprMsg **)pQueryMsg->pSqlFuncExprs)[i]; + pExprs[i].resBytes = 0; + + int16_t type = 0; + int16_t bytes = 0; + + SColIndexEx *pColumnIndexExInfo = &pExprs[i].pBase.colInfo; + + // tag column schema is kept in pQueryMsg->pColumnModel + if (TSDB_COL_IS_TAG(pColumnIndexExInfo->flag)) { + if (pColumnIndexExInfo->colIdx >= pQueryMsg->numOfTagsCols) { + tfree(pExprs); + + return TSDB_CODE_INVALID_QUERY_MSG; + } + + type = pTagSchema[pColumnIndexExInfo->colIdx].type; + bytes = pTagSchema[pColumnIndexExInfo->colIdx].bytes; + + } else { // parse the arithmetic expression + if (pExprs[i].pBase.functionId == TSDB_FUNC_ARITHM) { + code = buildAirthmeticExprFromMsg(&pExprs[i], pQueryMsg); + + if (code != TSDB_CODE_SUCCESS) { + tfree(pExprs); + return code; + } + + type = TSDB_DATA_TYPE_DOUBLE; + bytes = tDataTypeDesc[type].nSize; + } else { // parse the normal column + int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].pBase); + assert(j < pQueryMsg->numOfCols); + + SColumnInfo* pCol = &pQueryMsg->colList[j]; + type = pCol->type; + bytes = pCol->bytes; + } + } + + int32_t param = pExprs[i].pBase.arg[0].argValue.i64; + if (getResultDataInfo(type, bytes, pExprs[i].pBase.functionId, param, &pExprs[i].resType, &pExprs[i].resBytes, + &pExprs[i].interResBytes, 0, isSuperTable) != TSDB_CODE_SUCCESS) { + tfree(pExprs); + return TSDB_CODE_INVALID_QUERY_MSG; + } + + if (pExprs[i].pBase.functionId == TSDB_FUNC_TAG_DUMMY || pExprs[i].pBase.functionId == TSDB_FUNC_TS_DUMMY) { + tagLen += pExprs[i].resBytes; + } + assert(isValidDataType(pExprs[i].resType, pExprs[i].resBytes)); + } + + // get the correct result size for top/bottom query, according to the number of tags columns in selection clause + + // TODO refactor + for (int32_t i = 0; i < pQueryMsg->numOfOutputCols; ++i) { + pExprs[i].pBase = *((SSqlFuncExprMsg **)pQueryMsg->pSqlFuncExprs)[i]; + int16_t functId = pExprs[i].pBase.functionId; + if (functId == TSDB_FUNC_TOP || functId == TSDB_FUNC_BOTTOM) { + int32_t j = getColumnIndexInSource(pQueryMsg, &pExprs[i].pBase); + assert(j < pQueryMsg->numOfCols); + + SColumnInfo *pCol = &pQueryMsg->colList[j]; + int16_t type = pCol->type; + int16_t bytes = pCol->bytes; + + int32_t ret = + getResultDataInfo(type, bytes, pExprs[i].pBase.functionId, pExprs[i].pBase.arg[0].argValue.i64, + &pExprs[i].resType, &pExprs[i].resBytes, &pExprs[i].interResBytes, tagLen, isSuperTable); + assert(ret == TSDB_CODE_SUCCESS); + } + } + + tfree(pQueryMsg->pSqlFuncExprs); + *pSqlFuncExpr = pExprs; + + return TSDB_CODE_SUCCESS; +} + +static SSqlGroupbyExpr *createGroupbyExprFromMsg(SQueryTableMsg *pQueryMsg, int32_t *code) { + if (pQueryMsg->numOfGroupCols == 0) { + return NULL; + } + + // using group by tag columns + SSqlGroupbyExpr *pGroupbyExpr = + (SSqlGroupbyExpr *)malloc(sizeof(SSqlGroupbyExpr) + pQueryMsg->numOfGroupCols * sizeof(SColIndexEx)); + if (pGroupbyExpr == NULL) { + *code = TSDB_CODE_SERV_OUT_OF_MEMORY; + return NULL; + } + + SColIndexEx *pGroupbyColInfo = (SColIndexEx *)pQueryMsg->groupbyTagIds; + + pGroupbyExpr->numOfGroupCols = pQueryMsg->numOfGroupCols; + pGroupbyExpr->orderType = pQueryMsg->orderType; + pGroupbyExpr->orderIndex = pQueryMsg->orderByIdx; + + memcpy(pGroupbyExpr->columnInfo, pGroupbyColInfo, sizeof(SColIndexEx) * pGroupbyExpr->numOfGroupCols); + return pGroupbyExpr; +} + +static int32_t vnodeCreateFilterInfo(void *pQInfo, SQuery *pQuery) { + for (int32_t i = 0; i < pQuery->numOfCols; ++i) { + if (pQuery->colList[i].info.numOfFilters > 0) { + pQuery->numOfFilterCols++; + } + } + + if (pQuery->numOfFilterCols == 0) { + return TSDB_CODE_SUCCESS; + } + + pQuery->pFilterInfo = calloc(1, sizeof(SSingleColumnFilterInfo) * pQuery->numOfFilterCols); + + for (int32_t i = 0, j = 0; i < pQuery->numOfCols; ++i) { + if (pQuery->colList[i].info.numOfFilters > 0) { + SSingleColumnFilterInfo *pFilterInfo = &pQuery->pFilterInfo[j]; + + memcpy(&pFilterInfo->info, &pQuery->colList[i], sizeof(SColumnInfoEx)); + pFilterInfo->info.info.filters = NULL; + + pFilterInfo->numOfFilters = pQuery->colList[i].info.numOfFilters; + pFilterInfo->pFilters = calloc(pFilterInfo->numOfFilters, sizeof(SColumnFilterElem)); + + for (int32_t f = 0; f < pFilterInfo->numOfFilters; ++f) { + SColumnFilterElem *pSingleColFilter = &pFilterInfo->pFilters[f]; + pSingleColFilter->filterInfo = pQuery->colList[i].info.filters[f]; + + int32_t lower = pSingleColFilter->filterInfo.lowerRelOptr; + int32_t upper = pSingleColFilter->filterInfo.upperRelOptr; + + if (lower == TSDB_RELATION_INVALID && upper == TSDB_RELATION_INVALID) { + dError("QInfo:%p invalid filter info", pQInfo); + return TSDB_CODE_INVALID_QUERY_MSG; + } + + int16_t type = pQuery->colList[i].info.type; + int16_t bytes = pQuery->colList[i].info.bytes; + + __filter_func_t *rangeFilterArray = NULL; // vnodeGetRangeFilterFuncArray(type); + __filter_func_t *filterArray = NULL; // vnodeGetValueFilterFuncArray(type); + + if (rangeFilterArray == NULL && filterArray == NULL) { + dError("QInfo:%p failed to get filter function, invalid data type:%d", pQInfo, type); + return TSDB_CODE_INVALID_QUERY_MSG; + } + + if ((lower == TSDB_RELATION_LARGE_EQUAL || lower == TSDB_RELATION_LARGE) && + (upper == TSDB_RELATION_LESS_EQUAL || upper == TSDB_RELATION_LESS)) { + if (lower == TSDB_RELATION_LARGE_EQUAL) { + if (upper == TSDB_RELATION_LESS_EQUAL) { + pSingleColFilter->fp = rangeFilterArray[4]; + } else { + pSingleColFilter->fp = rangeFilterArray[2]; + } + } else { + if (upper == TSDB_RELATION_LESS_EQUAL) { + pSingleColFilter->fp = rangeFilterArray[3]; + } else { + pSingleColFilter->fp = rangeFilterArray[1]; + } + } + } else { // set callback filter function + if (lower != TSDB_RELATION_INVALID) { + pSingleColFilter->fp = filterArray[lower]; + + if (upper != TSDB_RELATION_INVALID) { + dError("pQInfo:%p failed to get filter function, invalid filter condition", pQInfo, type); + return TSDB_CODE_INVALID_QUERY_MSG; + } + } else { + pSingleColFilter->fp = filterArray[upper]; + } + } + assert(pSingleColFilter->fp != NULL); + pSingleColFilter->bytes = bytes; + } + + j++; + } + } + + return TSDB_CODE_SUCCESS; +} + +static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *pExprs, SArray* pTableIdList) { + SQInfo *pQInfo = (SQInfo *)calloc(1, sizeof(SQInfo)); + if (pQInfo == NULL) { + goto _clean_memory; + } + + SQuery *pQuery = calloc(1, sizeof(SQuery)); + pQInfo->runtimeEnv.pQuery = pQuery; + + int16_t numOfCols = pQueryMsg->numOfCols; + int16_t numOfOutputCols = pQueryMsg->numOfOutputCols; + + pQuery->numOfCols = numOfCols; + pQuery->numOfOutputCols = numOfOutputCols; + + pQuery->limit.limit = pQueryMsg->limit; + pQuery->limit.offset = pQueryMsg->offset; + + pQuery->order.order = pQueryMsg->order; + pQuery->order.orderColId = pQueryMsg->orderColId; + + pQuery->pSelectExpr = pExprs; + pQuery->pGroupbyExpr = pGroupbyExpr; + + pQuery->intervalTime = pQueryMsg->intervalTime; + + pQuery->slidingTime = pQueryMsg->slidingTime; + pQuery->slidingTimeUnit = pQueryMsg->slidingTimeUnit; + + pQuery->interpoType = pQueryMsg->interpoType; + + pQuery->colList = calloc(1, sizeof(SSingleColumnFilterInfo) * numOfCols); + if (pQuery->colList == NULL) { + goto _clean_memory; + } + + for (int16_t i = 0; i < numOfCols; ++i) { + pQuery->colList[i].info = pQueryMsg->colList[i]; +// SColumnInfo *pColInfo = &pQuery->colList[i].data; +// pColInfo->filters = NULL; +// if (colList[i].numOfFilters > 0) { +// pColInfo->filters = calloc(1, colList[i].numOfFilters * sizeof(SColumnFilterInfo)); +// +// for (int32_t j = 0; j < colList[i].numOfFilters; ++j) { +// tscColumnFilterInfoCopy(&pColInfo->filters[j], &colList[i].filters[j]); +// } +// } else { +// pQuery->colList[i].data.filters = NULL; +// } + } + + // calculate the result row size + for (int16_t col = 0; col < numOfOutputCols; ++col) { + assert(pExprs[col].resBytes > 0); + pQuery->rowSize += pExprs[col].resBytes; + } + + int32_t ret = vnodeCreateFilterInfo(pQInfo, pQuery); + if (ret != TSDB_CODE_SUCCESS) { + goto _clean_memory; + } + + // prepare the result buffer + pQuery->sdata = (SData **)calloc(pQuery->numOfOutputCols, sizeof(SData *)); + if (pQuery->sdata == NULL) { + goto _clean_memory; + } + + // set the output buffer capacity + pQuery->capacity = 4096; + for (int32_t col = 0; col < pQuery->numOfOutputCols; ++col) { + assert(pExprs[col].interResBytes >= pExprs[col].resBytes); + + // allocate additional memory for interResults that are usually larger then final results + size_t size = (pQuery->capacity + 1) * pExprs[col].resBytes + pExprs[col].interResBytes + sizeof(SData); + pQuery->sdata[col] = (SData *)calloc(1, size); + if (pQuery->sdata[col] == NULL) { + goto _clean_memory; + } + } + + if (pQuery->interpoType != TSDB_INTERPO_NONE) { + pQuery->defaultVal = malloc(sizeof(int64_t) * pQuery->numOfOutputCols); + if (pQuery->defaultVal == NULL) { + goto _clean_memory; + } + + // the first column is the timestamp + memcpy(pQuery->defaultVal, (char *)pQueryMsg->defaultVal, pQuery->numOfOutputCols * sizeof(int64_t)); + } + + // to make sure third party won't overwrite this structure + pQInfo->signature = (uint64_t)pQInfo; + pQInfo->pTableIdList = pTableIdList; + + pQuery->pos = -1; + // dTrace("vid:%d sid:%d meterId:%s, QInfo is allocated:%p", pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, pQInfo); + + return pQInfo; + +_clean_memory: + tfree(pQuery->defaultVal); + + if (pQuery->sdata != NULL) { + for (int16_t col = 0; col < pQuery->numOfOutputCols; ++col) { + tfree(pQuery->sdata[col]); + } + } + + tfree(pQuery->sdata); + tfree(pQuery->pFilterInfo); + tfree(pQuery->colList); + + tfree(pExprs); + tfree(pGroupbyExpr); + + tfree(pQInfo); + + return NULL; +} + +bool isQInfoValid(void *param) { + SQInfo *pQInfo = (SQInfo *)param; + if (pQInfo == NULL) { + return false; + } + + /* + * pQInfo->signature may be changed by another thread, so we assign value of signature + * into local variable, then compare by using local variable + */ + uint64_t sig = pQInfo->signature; + return (sig == (uint64_t)pQInfo); +} + +void vnodeFreeQInfo(SQInfo* pQInfo, bool decQueryRef) { + if (!isQInfoValid(pQInfo)) { + return; + } + + pQInfo->killed = 1; + dTrace("QInfo:%p start to free SQInfo", pQInfo); + + if (decQueryRef) { + vnodeDecMeterRefcnt(pQInfo); + } + + SQuery *pQuery = pQInfo->runtimeEnv.pQuery; + + for (int col = 0; col < pQuery->numOfOutputCols; ++col) { + tfree(pQuery->sdata[col]); + } + +// for (int col = 0; col < pQuery->numOfCols; ++col) { +// vnodeFreeColumnInfo(&pQuery->colList[col].data); +// } +// +// if (pQuery->colList[0].colIdx != PRIMARYKEY_TIMESTAMP_COL_INDEX) { +// tfree(pQuery->tsData); +// } + + sem_destroy(&(pQInfo->dataReady)); + vnodeQueryFreeQInfoEx(pQInfo); + + for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) { + SSingleColumnFilterInfo *pColFilter = &pQuery->pFilterInfo[i]; + if (pColFilter->numOfFilters > 0) { + tfree(pColFilter->pFilters); + } + } + + tfree(pQuery->pFilterInfo); + tfree(pQuery->colList); + tfree(pQuery->sdata); + + if (pQuery->pSelectExpr != NULL) { + for (int32_t i = 0; i < pQuery->numOfOutputCols; ++i) { + SSqlBinaryExprInfo *pBinExprInfo = &pQuery->pSelectExpr[i].binExprInfo; + + if (pBinExprInfo->numOfCols > 0) { + tfree(pBinExprInfo->pReqColumns); + tSQLBinaryExprDestroy(&pBinExprInfo->pBinExpr, NULL); + } + } + + tfree(pQuery->pSelectExpr); + } + + if (pQuery->defaultVal != NULL) { + tfree(pQuery->defaultVal); + } + + tfree(pQuery->pGroupbyExpr); + tfree(pQuery); + +// dTrace("QInfo:%p vid:%d sid:%d meterId:%s, QInfo is freed", pQInfo, pObj->vnode, pObj->sid, pObj->meterId); + + //destroy signature, in order to avoid the query process pass the object safety check + memset(pQInfo, 0, sizeof(SQInfo)); + tfree(pQInfo); +} + +static int32_t createQInfo(SQueryTableMsg *pQueryMsg, SSqlGroupbyExpr *pGroupbyExpr, SSqlFunctionExpr *pSqlExprs, + SArray* pTableIdList, SQInfo **pQInfo) { + int32_t code = TSDB_CODE_SUCCESS; + + (*pQInfo) = createQInfoImpl(pQueryMsg, pGroupbyExpr, pSqlExprs, pTableIdList); + if (pQInfo == NULL) { + code = TSDB_CODE_SERV_OUT_OF_MEMORY; + goto _error; + } + + SQuery* pQuery = (*pQInfo)->runtimeEnv.pQuery; + dTrace("qmsg:%p create QInfo:%p, QInfo created", pQueryMsg, pQInfo); + +// STableIdInfo **pTableIdList = (STableIdInfo **)pQueryMsg->pSidExtInfo; +// if (pTableIdList != NULL && pTableIdList[0]->key > 0) { +// pQuery->window.skey = pTableIdList[0]->key; +// } else { + pQuery->window.skey = pQueryMsg->window.skey; + pQuery->window.ekey = pQueryMsg->window.ekey; + + pQuery->lastKey = pQuery->window.skey; + + if (sem_init(&(*pQInfo)->dataReady, 0, 0) != 0) { + // dError("QInfo:%p vid:%d sid:%d meterId:%s, init dataReady sem failed, reason:%s", pQInfo, pMeterObj->vnode, + // pMeterObj->sid, pMeterObj->meterId, strerror(errno)); + code = TSDB_CODE_APP_ERROR; + goto _error; + } + + vnodeParametersSafetyCheck(pQuery); + + STSBuf *pTSBuf = NULL; + if (pQueryMsg->tsLen > 0) { // open new file to save the result + char *tsBlock = (char *)pQueryMsg + pQueryMsg->tsOffset; + pTSBuf = tsBufCreateFromCompBlocks(tsBlock, pQueryMsg->tsNumOfBlocks, pQueryMsg->tsLen, pQueryMsg->tsOrder); + + tsBufResetPos(pTSBuf); + tsBufNextPos(pTSBuf); + } + + if ((code = vnodeQueryTablePrepare(*pQInfo, pTSBuf)) != TSDB_CODE_SUCCESS) { + goto _error; + } + + // if (pQInfo->over == 1) { + // vnodeAddRefCount(pQInfo); // for retrieve procedure + // return pQInfo; + // } + + // dTrace("QInfo:%p set query flag and prepare runtime environment completed, ref:%d, wait for schedule", pQInfo, + // pQInfo->refCount); + return code; + +_error: + // table query ref will be decrease during error handling + vnodeFreeQInfo(*pQInfo, false); + return code; +} + +int32_t qCreateQueryInfo(SQueryTableMsg *pQueryTableMsg, SQInfo **pQInfo) { + assert(pQueryTableMsg != NULL); + + int32_t code = TSDB_CODE_SUCCESS; + SArray* pTableIdList = NULL; + if ((code = convertQueryTableMsg(pQueryTableMsg, &pTableIdList)) != TSDB_CODE_SUCCESS) { + return code; + } + + if (pQueryTableMsg->numOfTables <= 0) { + dError("Invalid number of tables to query, numOfTables:%d", pQueryTableMsg->numOfTables); + code = TSDB_CODE_INVALID_QUERY_MSG; + goto _query_over; + } + + // todo check vnode status + if (pTableIdList == NULL || taosArrayGetSize(pTableIdList) == 0) { + dError("qmsg:%p, SQueryTableMsg wrong format", pQueryTableMsg); + code = TSDB_CODE_INVALID_QUERY_MSG; + goto _query_over; + } + + SSqlFunctionExpr *pExprs = NULL; + if ((code = createSqlFunctionExprFromMsg(pQueryTableMsg, &pExprs)) != TSDB_CODE_SUCCESS) { + goto _query_over; + } + + SSqlGroupbyExpr *pGroupbyExpr = createGroupbyExprFromMsg(pQueryTableMsg, &code); + if ((pGroupbyExpr == NULL && pQueryTableMsg->numOfGroupCols != 0) || code != TSDB_CODE_SUCCESS) { + goto _query_over; + } + + if (QUERY_IS_STABLE_QUERY(pQueryTableMsg->queryType)) { + // pObj->qhandle = vnodeQueryOnMultiMeters(pMeterObjList, pGroupbyExpr, pExprs, pQueryTableMsg, &code); + } else { + code = createQInfo(pQueryTableMsg, pGroupbyExpr, pExprs, pTableIdList, pQInfo); + } + +_query_over: + if (code != TSDB_CODE_SUCCESS) { + taosArrayDestroy(pTableIdList); + } + + // if failed to add ref for all meters in this query, abort current query + // if (code != TSDB_CODE_SUCCESS) { + // vnodeDecQueryRefCount(pQueryTableMsg, pMeterObjList, incNumber); + // } + // + // tfree(pQueryTableMsg->pSqlFuncExprs); + // tfree(pMeterObjList); + // ret = vnodeSendQueryRspMsg(pObj, code, pObj->qhandle); + // + // tfree(pQueryTableMsg->pSidExtInfo); + // for(int32_t i = 0; i < pQueryTableMsg->numOfCols; ++i) { + // vnodeFreeColumnInfo(&pQueryTableMsg->colList[i]); + // } + // + // atomic_fetch_add_32(&vnodeSelectReqNum, 1); + return TSDB_CODE_SUCCESS; +} + +int32_t qRetrieveQueryResultInfo(SQInfo* pQInfo, int32_t *numOfRows, int32_t* rowsize) { + if (pQInfo == NULL || !isQInfoValid(pQInfo)) { + return TSDB_CODE_INVALID_QHANDLE; + } + + if (pQInfo->killed) { + dTrace("QInfo:%p query is killed, code:%d", pQInfo, pQInfo->code); + if (pQInfo->code == TSDB_CODE_SUCCESS) { + return TSDB_CODE_QUERY_CANCELLED; + } else { // in case of not TSDB_CODE_SUCCESS, return the code to client + return abs(pQInfo->code); + } + } + + sem_wait(&pQInfo->dataReady); + + SQuery* pQuery = pQInfo->runtimeEnv.pQuery; + +// *numOfRows = pQInfo->rec.pointsRead; +// *rowsize = pQuery->rowSize; + *numOfRows = 1; + +// dTrace("QInfo:%p, retrieve data info completed, precision:%d, rowsize:%d, rows:%d, code:%d", pQInfo, *timePrec, +// *rowsize, *numOfRows, pQInfo->code); + + if (pQInfo->code < 0) { // less than 0 means there are error existed. + return -pQInfo->code; + } +} diff --git a/src/query/src/queryUtil.c b/src/query/src/queryUtil.c new file mode 100644 index 0000000000000000000000000000000000000000..c970363c01fc148a58da8dd78cf0a43c255297cc --- /dev/null +++ b/src/query/src/queryUtil.c @@ -0,0 +1,269 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "os.h" + +#include "hash.h" +#include "taosmsg.h" +#include "qextbuffer.h" +#include "ttime.h" + +#include "qinterpolation.h" +//#include "tscJoinProcess.h" +#include "ttime.h" + +#include "queryExecutor.h" + +int32_t initWindowResInfo(SWindowResInfo *pWindowResInfo, SQueryRuntimeEnv *pRuntimeEnv, int32_t size, + int32_t threshold, int16_t type) { + pWindowResInfo->capacity = size; + pWindowResInfo->threshold = threshold; + + pWindowResInfo->type = type; + + _hash_fn_t fn = taosGetDefaultHashFunction(type); + pWindowResInfo->hashList = taosHashInit(threshold, fn, false); + + pWindowResInfo->curIndex = -1; + pWindowResInfo->size = 0; + + // use the pointer arraylist + pWindowResInfo->pResult = calloc(threshold, sizeof(SWindowResult)); + for (int32_t i = 0; i < pWindowResInfo->capacity; ++i) { + SPosInfo posInfo = {-1, -1}; + createQueryResultInfo(pRuntimeEnv->pQuery, &pWindowResInfo->pResult[i], pRuntimeEnv->stableQuery, &posInfo); + } + + return TSDB_CODE_SUCCESS; +} + +void destroyTimeWindowRes(SWindowResult *pWindowRes, int32_t nOutputCols) { + if (pWindowRes == NULL) { + return; + } + + for (int32_t i = 0; i < nOutputCols; ++i) { + free(pWindowRes->resultInfo[i].interResultBuf); + } + + free(pWindowRes->resultInfo); +} + +void cleanupTimeWindowInfo(SWindowResInfo *pWindowResInfo, int32_t numOfCols) { + if (pWindowResInfo == NULL || pWindowResInfo->capacity == 0) { + assert(pWindowResInfo->hashList == NULL && pWindowResInfo->pResult == NULL); + return; + } + + for (int32_t i = 0; i < pWindowResInfo->capacity; ++i) { + SWindowResult *pResult = &pWindowResInfo->pResult[i]; + destroyTimeWindowRes(pResult, numOfCols); + } + + taosHashCleanup(pWindowResInfo->hashList); + tfree(pWindowResInfo->pResult); +} + +void resetTimeWindowInfo(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo) { + if (pWindowResInfo == NULL || pWindowResInfo->capacity == 0) { + return; + } + + for (int32_t i = 0; i < pWindowResInfo->size; ++i) { + SWindowResult *pWindowRes = &pWindowResInfo->pResult[i]; + clearTimeWindowResBuf(pRuntimeEnv, pWindowRes); + } + + pWindowResInfo->curIndex = -1; + taosHashCleanup(pWindowResInfo->hashList); + pWindowResInfo->size = 0; + + _hash_fn_t fn = taosGetDefaultHashFunction(pWindowResInfo->type); + pWindowResInfo->hashList = taosHashInit(pWindowResInfo->capacity, fn, false); + + pWindowResInfo->startTime = 0; + pWindowResInfo->prevSKey = 0; +} + +void clearFirstNTimeWindow(SQueryRuntimeEnv *pRuntimeEnv, int32_t num) { + SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + if (pWindowResInfo == NULL || pWindowResInfo->capacity == 0 || pWindowResInfo->size == 0 || num == 0) { + return; + } + + int32_t numOfClosed = numOfClosedTimeWindow(pWindowResInfo); + assert(num >= 0 && num <= numOfClosed); + + for (int32_t i = 0; i < num; ++i) { + SWindowResult *pResult = &pWindowResInfo->pResult[i]; + if (pResult->status.closed) { // remove the window slot from hash table + taosHashRemove(pWindowResInfo->hashList, (const char *)&pResult->window.skey, TSDB_KEYSIZE); + } else { + break; + } + } + + int32_t remain = pWindowResInfo->size - num; + + // clear all the closed windows from the window list + for (int32_t k = 0; k < remain; ++k) { + copyTimeWindowResBuf(pRuntimeEnv, &pWindowResInfo->pResult[k], &pWindowResInfo->pResult[num + k]); + } + + // move the unclosed window in the front of the window list + for (int32_t k = remain; k < pWindowResInfo->size; ++k) { + SWindowResult *pWindowRes = &pWindowResInfo->pResult[k]; + clearTimeWindowResBuf(pRuntimeEnv, pWindowRes); + } + + pWindowResInfo->size = remain; + + for (int32_t k = 0; k < pWindowResInfo->size; ++k) { + SWindowResult *pResult = &pWindowResInfo->pResult[k]; + int32_t *p = (int32_t *)taosHashGet(pWindowResInfo->hashList, (const char *)&pResult->window.skey, TSDB_KEYSIZE); + int32_t v = (*p - num); + assert(v >= 0 && v <= pWindowResInfo->size); + + taosHashPut(pWindowResInfo->hashList, (const char *)&pResult->window.skey, TSDB_KEYSIZE, (char *)&v, + sizeof(int32_t)); + } + + pWindowResInfo->curIndex = -1; +} + +void clearClosedTimeWindow(SQueryRuntimeEnv *pRuntimeEnv) { + SWindowResInfo *pWindowResInfo = &pRuntimeEnv->windowResInfo; + if (pWindowResInfo == NULL || pWindowResInfo->capacity == 0 || pWindowResInfo->size == 0) { + return; + } + + int32_t numOfClosed = numOfClosedTimeWindow(pWindowResInfo); + clearFirstNTimeWindow(pRuntimeEnv, numOfClosed); +} + +int32_t numOfClosedTimeWindow(SWindowResInfo *pWindowResInfo) { + int32_t i = 0; + while (i < pWindowResInfo->size && pWindowResInfo->pResult[i].status.closed) { + ++i; + } + + return i; +} + +void closeAllTimeWindow(SWindowResInfo *pWindowResInfo) { + assert(pWindowResInfo->size >= 0 && pWindowResInfo->capacity >= pWindowResInfo->size); + + for (int32_t i = 0; i < pWindowResInfo->size; ++i) { + if (pWindowResInfo->pResult[i].status.closed) { + continue; + } + + pWindowResInfo->pResult[i].status.closed = true; + } +} + +/* + * remove the results that are not the FIRST time window that spreads beyond the + * the last qualified time stamp in case of sliding query, which the sliding time is not equalled to the interval time + */ +void removeRedundantWindow(SWindowResInfo *pWindowResInfo, TSKEY lastKey, int32_t order) { + assert(pWindowResInfo->size >= 0 && pWindowResInfo->capacity >= pWindowResInfo->size); + + int32_t i = 0; + while (i < pWindowResInfo->size && + ((pWindowResInfo->pResult[i].window.ekey < lastKey && order == QUERY_ASC_FORWARD_STEP) || + (pWindowResInfo->pResult[i].window.skey > lastKey && order == QUERY_DESC_FORWARD_STEP))) { + ++i; + } + + // assert(i < pWindowResInfo->size); + if (i < pWindowResInfo->size) { + pWindowResInfo->size = (i + 1); + } +} + +SWindowResult *getWindowResult(SWindowResInfo *pWindowResInfo, int32_t slot) { + assert(pWindowResInfo != NULL && slot >= 0 && slot < pWindowResInfo->size); + return &pWindowResInfo->pResult[slot]; +} + +bool isWindowResClosed(SWindowResInfo *pWindowResInfo, int32_t slot) { + return (getWindowResult(pWindowResInfo, slot)->status.closed == true); +} + +int32_t curTimeWindow(SWindowResInfo *pWindowResInfo) { + assert(pWindowResInfo->curIndex >= 0 && pWindowResInfo->curIndex < pWindowResInfo->size); + return pWindowResInfo->curIndex; +} + +void closeTimeWindow(SWindowResInfo *pWindowResInfo, int32_t slot) { + getWindowResult(pWindowResInfo, slot)->status.closed = true; +} + +void clearTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pWindowRes) { + if (pWindowRes == NULL) { + return; + } + + for (int32_t i = 0; i < pRuntimeEnv->pQuery->numOfOutputCols; ++i) { + SResultInfo *pResultInfo = &pWindowRes->resultInfo[i]; + + char * s = getPosInResultPage(pRuntimeEnv, i, pWindowRes); + size_t size = pRuntimeEnv->pQuery->pSelectExpr[i].resBytes; + memset(s, 0, size); + + resetResultInfo(pResultInfo); + } + + pWindowRes->numOfRows = 0; + // pWindowRes->nAlloc = 0; + pWindowRes->pos = (SPosInfo){-1, -1}; + pWindowRes->status.closed = false; + pWindowRes->window = (STimeWindow){0, 0}; +} + +/** + * The source window result pos attribution of the source window result does not assign to the destination, + * since the attribute of "Pos" is bound to each window result when the window result is created in the + * disk-based result buffer. + */ +void copyTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *dst, const SWindowResult *src) { + dst->numOfRows = src->numOfRows; + // dst->nAlloc = src->nAlloc; + dst->window = src->window; + dst->status = src->status; + + int32_t nOutputCols = pRuntimeEnv->pQuery->numOfOutputCols; + + for (int32_t i = 0; i < nOutputCols; ++i) { + SResultInfo *pDst = &dst->resultInfo[i]; + SResultInfo *pSrc = &src->resultInfo[i]; + + char *buf = pDst->interResultBuf; + memcpy(pDst, pSrc, sizeof(SResultInfo)); + pDst->interResultBuf = buf; // restore the allocated buffer + + // copy the result info struct + memcpy(pDst->interResultBuf, pSrc->interResultBuf, pDst->bufLen); + + // copy the output buffer data from src to dst, the position info keep unchanged + char * dstBuf = getPosInResultPage(pRuntimeEnv, i, dst); + char * srcBuf = getPosInResultPage(pRuntimeEnv, i, (SWindowResult *)src); + size_t s = pRuntimeEnv->pQuery->pSelectExpr[i].resBytes; + + memcpy(dstBuf, srcBuf, s); + } +} + diff --git a/src/query/src/tvariant.c b/src/query/src/tvariant.c index c9168ab252bd39f58ddcf0e09f676ff585c27c56..f0addb733bf496e1f65af8a744f4584e6bb2396c 100644 --- a/src/query/src/tvariant.c +++ b/src/query/src/tvariant.c @@ -13,17 +13,16 @@ * along with this program. If not, see . */ +#include "tvariant.h" #include "hash.h" -#include "hashutil.h" +#include "hashfunc.h" #include "os.h" #include "shash.h" +#include "taos.h" +#include "taosdef.h" #include "tstoken.h" #include "ttokendef.h" -#include "taosdef.h" #include "tutil.h" -#include "tvariant.h" -#include "taosdef.h" -#include "taos.h" // todo support scientific expression number and oct number void tVariantCreate(tVariant *pVar, SSQLToken *token) { tVariantCreateFromString(pVar, token->z, token->n, token->type); } diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index 7ce3b287fb7699fbaa4a1ef595869059247fc1be..c6f527a7d2851d71b9fe9d296a5ae571bac364b6 100755 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -360,8 +360,8 @@ void rpcSendRequest(void *shandle, SRpcIpSet *pIpSet, SRpcMsg *pMsg) { // for TDengine, all the query, show commands shall have TCP connection char type = pMsg->msgType; if (type == TSDB_MSG_TYPE_QUERY || type == TSDB_MSG_TYPE_RETRIEVE || - type == TSDB_MSG_TYPE_STABLE_META || type == TSDB_MSG_TYPE_MULTI_TABLE_META || - type == TSDB_MSG_TYPE_SHOW ) + type == TSDB_MSG_TYPE_CM_STABLE_META || type == TSDB_MSG_TYPE_CM_TABLES_META || + type == TSDB_MSG_TYPE_CM_SHOW ) pContext->connType = RPC_CONN_TCPC; rpcSendReqToServer(pRpc, pContext); @@ -441,15 +441,16 @@ void rpcSendRedirectRsp(void *thandle, SRpcIpSet *pIpSet) { return; } -void rpcGetConnInfo(void *thandle, SRpcConnInfo *pInfo) { +int rpcGetConnInfo(void *thandle, SRpcConnInfo *pInfo) { SRpcConn *pConn = (SRpcConn *)thandle; + if (pConn->user[0] == 0) return -1; pInfo->clientIp = pConn->peerIp; pInfo->clientPort = pConn->peerPort; pInfo->serverIp = pConn->destIp; - assert(pConn->user[0]); strcpy(pInfo->user, pConn->user); + return 0; } static void rpcFreeMsg(void *msg) { @@ -494,35 +495,35 @@ static void rpcCloseConn(void *thandle) { SRpcConn *pConn = (SRpcConn *)thandle; SRpcInfo *pRpc = pConn->pRpc; - rpcLockConn(pConn); + if (pConn->user[0] == 0) return; - if (pConn->user[0]) { - pConn->user[0] = 0; - if (taosCloseConn[pConn->connType]) (*taosCloseConn[pConn->connType])(pConn->chandle); - - taosTmrStopA(&pConn->pTimer); - taosTmrStopA(&pConn->pIdleTimer); - - if ( pRpc->connType == TAOS_CONN_SERVER) { - char hashstr[40] = {0}; - sprintf(hashstr, "%x:%x:%x:%d", pConn->peerIp, pConn->linkUid, pConn->peerId, pConn->connType); - taosDeleteStrHash(pRpc->hash, hashstr); - rpcFreeMsg(pConn->pRspMsg); // it may have a response msg saved, but not request msg - pConn->pRspMsg = NULL; - pConn->inType = 0; - pConn->inTranId = 0; - } else { - pConn->outType = 0; - pConn->outTranId = 0; - pConn->pReqMsg = NULL; - } + rpcLockConn(pConn); - taosFreeId(pRpc->idPool, pConn->sid); - pConn->pContext = NULL; + pConn->user[0] = 0; + if (taosCloseConn[pConn->connType]) (*taosCloseConn[pConn->connType])(pConn->chandle); - tTrace("%s %p, rpc connection is closed", pRpc->label, pConn); + taosTmrStopA(&pConn->pTimer); + taosTmrStopA(&pConn->pIdleTimer); + + if ( pRpc->connType == TAOS_CONN_SERVER) { + char hashstr[40] = {0}; + sprintf(hashstr, "%x:%x:%x:%d", pConn->peerIp, pConn->linkUid, pConn->peerId, pConn->connType); + taosDeleteStrHash(pRpc->hash, hashstr); + rpcFreeMsg(pConn->pRspMsg); // it may have a response msg saved, but not request msg + pConn->pRspMsg = NULL; + pConn->inType = 0; + pConn->inTranId = 0; + } else { + pConn->outType = 0; + pConn->outTranId = 0; + pConn->pReqMsg = NULL; } + taosFreeId(pRpc->idPool, pConn->sid); + pConn->pContext = NULL; + + tTrace("%s %p, rpc connection is closed", pRpc->label, pConn); + rpcUnlockConn(pConn); } @@ -814,7 +815,7 @@ static void *rpcProcessMsgFromPeer(SRecvInfo *pRecv) { terrno = 0; pConn = rpcProcessMsgHead(pRpc, pRecv); - if (pHead->msgType < TSDB_MSG_TYPE_HEARTBEAT || (rpcDebugFlag & 16)) { + if (pHead->msgType < TSDB_MSG_TYPE_CM_HEARTBEAT || (rpcDebugFlag & 16)) { tTrace("%s %p, %s received from 0x%x:%hu, parse code:%x len:%d sig:0x%08x:0x%08x:%d", pRpc->label, pConn, taosMsg[pHead->msgType], pRecv->ip, pRecv->port, terrno, pRecv->msgLen, pHead->sourceId, pHead->destId, pHead->tranId, pHead->port); @@ -983,12 +984,12 @@ static void rpcSendMsgToPeer(SRpcConn *pConn, void *msg, int msgLen) { msgLen = rpcAddAuthPart(pConn, msg, msgLen); if ( rpcIsReq(pHead->msgType)) { - if (pHead->msgType < TSDB_MSG_TYPE_HEARTBEAT || (rpcDebugFlag & 16)) + if (pHead->msgType < TSDB_MSG_TYPE_CM_HEARTBEAT || (rpcDebugFlag & 16)) tTrace("%s %p, %s is sent to %s:%hu, len:%d sig:0x%08x:0x%08x:%d", pRpc->label, pConn, taosMsg[pHead->msgType], pConn->peerIpstr, pConn->peerPort, msgLen, pHead->sourceId, pHead->destId, pHead->tranId); } else { - if (pHead->msgType < TSDB_MSG_TYPE_HEARTBEAT || (rpcDebugFlag & 16)) + if (pHead->msgType < TSDB_MSG_TYPE_CM_HEARTBEAT || (rpcDebugFlag & 16)) tTrace( "%s %p, %s is sent to %s:%hu, code:%u len:%d sig:0x%08x:0x%08x:%d", pRpc->label, pConn, taosMsg[pHead->msgType], pConn->peerIpstr, pConn->peerPort, (uint8_t)pHead->content[0], msgLen, pHead->sourceId, pHead->destId, pHead->tranId); diff --git a/src/util/inc/hash.h b/src/util/inc/hash.h index 3d60abe9c69769541bc26d70c7d0b787ed2252d6..1bbc8dcf5c229112b421de9a4b5ef4f056a39438 100644 --- a/src/util/inc/hash.h +++ b/src/util/inc/hash.h @@ -20,20 +20,21 @@ extern "C" { #endif -#include "hashutil.h" +#include "hashfunc.h" #define HASH_MAX_CAPACITY (1024 * 1024 * 16) -#define HASH_VALUE_IN_TRASH (-1) #define HASH_DEFAULT_LOAD_FACTOR (0.75) #define HASH_INDEX(v, c) ((v) & ((c)-1)) +typedef void (*_hash_free_fn_t)(void *param); + typedef struct SHashNode { - char *key; // null-terminated string + char *key; union { struct SHashNode * prev; struct SHashEntry *prev1; }; - + struct SHashNode *next; uint32_t hashVal; // the hash value of key, if hashVal == HASH_VALUE_IN_TRASH, this node is moved to trash uint32_t keyLen; // length of the key @@ -45,32 +46,121 @@ typedef struct SHashEntry { uint32_t num; } SHashEntry; -typedef struct HashObj { - SHashEntry **hashList; - uint32_t capacity; // number of slots - int size; // number of elements in hash table - _hash_fn_t hashFp; // hash function - bool multithreadSafe; // enable lock or not +typedef struct SHashObj { + SHashEntry ** hashList; + size_t capacity; // number of slots + size_t size; // number of elements in hash table + _hash_fn_t hashFp; // hash function + _hash_free_fn_t freeFp; // hash node free callback function -#if defined LINUX - pthread_rwlock_t lock; +#if defined(LINUX) + pthread_rwlock_t *lock; #else - pthread_mutex_t lock; + pthread_mutex_t *lock; #endif +} SHashObj; + +typedef struct SHashMutableIterator { + SHashObj * pHashObj; + int32_t entryIndex; + SHashNode *pCur; + SHashNode *pNext; // current node can be deleted for mutable iterator, so keep the next one before return current + int32_t num; // already check number of elements in hash table +} SHashMutableIterator; + +/** + * init the hash table + * + * @param capacity initial capacity of the hash table + * @param fn hash function to generate the hash value + * @param threadsafe thread safe or not + * @return + */ +SHashObj *taosHashInit(size_t capacity, _hash_fn_t fn, bool threadsafe); -} HashObj; +/** + * return the size of hash table + * @param pHashObj + * @return + */ +size_t taosHashGetSize(const SHashObj *pHashObj); + +/** + * put element into hash table, if the element with the same key exists, update it + * @param pHashObj + * @param key + * @param keyLen + * @param data + * @param size + * @return + */ +int32_t taosHashPut(SHashObj *pHashObj, const char *key, size_t keyLen, void *data, size_t size); -void *taosInitHashTable(uint32_t capacity, _hash_fn_t fn, bool multithreadSafe); -void taosDeleteFromHashTable(HashObj *pObj, const char *key, uint32_t keyLen); +/** + * return the payload data with the specified key + * + * @param pHashObj + * @param key + * @param keyLen + * @return + */ +void *taosHashGet(SHashObj *pHashObj, const char *key, size_t keyLen); -int32_t taosAddToHashTable(HashObj *pObj, const char *key, uint32_t keyLen, void *data, uint32_t size); -int32_t taosNumElemsInHashTable(HashObj *pObj); +/** + * remove item with the specified key + * @param pHashObj + * @param key + * @param keyLen + */ +void taosHashRemove(SHashObj *pHashObj, const char *key, size_t keyLen); -char *taosGetDataFromHashTable(HashObj *pObj, const char *key, uint32_t keyLen); +/** + * clean up hash table + * @param handle + */ +void taosHashCleanup(SHashObj *pHashObj); -void taosCleanUpHashTable(void *handle); +/** + * Set the free callback function + * This function if set will be invoked right before freeing each hash node + * @param pHashObj + */ +void taosHashSetFreecb(SHashObj *pHashObj, _hash_free_fn_t freeFp); -int32_t taosGetHashMaxOverflowLength(HashObj *pObj); +/** + * + * @param pHashObj + * @return + */ +SHashMutableIterator* taosHashCreateIter(SHashObj *pHashObj); + +/** + * + * @param iter + * @return + */ +bool taosHashIterNext(SHashMutableIterator *iter); + +/** + * + * @param iter + * @return + */ +void *taosHashIterGet(SHashMutableIterator *iter); + +/** + * + * @param iter + * @return + */ +void* taosHashDestroyIter(SHashMutableIterator* iter); + +/** + * + * @param pHashObj + * @return + */ +int32_t taosHashGetMaxOverflowLinkLength(const SHashObj *pHashObj); #ifdef __cplusplus } diff --git a/src/util/inc/hashutil.h b/src/util/inc/hashfunc.h similarity index 100% rename from src/util/inc/hashutil.h rename to src/util/inc/hashfunc.h diff --git a/src/util/inc/ihash.h b/src/util/inc/ihash.h index 9623f95bbd8b65ad4b2dbcae2f26112eb6beb1e1..f283abe737b62cba17a246e1942c22c4a0b90123 100644 --- a/src/util/inc/ihash.h +++ b/src/util/inc/ihash.h @@ -34,6 +34,12 @@ char *taosAddIntHash(void *handle, uint64_t key, char *pData); int32_t taosHashInt(void *handle, uint64_t key); +void taosCleanUpIntHashWithFp(void *handle, void (*fp)(char *)); + +void taosVisitIntHashWithFp(void *handle, void (*fp)(char *, void *), void *param); + +int32_t taosGetIntHashSize(void *handle); + #ifdef __cplusplus } #endif diff --git a/src/util/inc/sskiplist.h b/src/util/inc/sskiplist.h deleted file mode 100644 index f2ae2efc54342a1aa0fd1298b8868132d65e246d..0000000000000000000000000000000000000000 --- a/src/util/inc/sskiplist.h +++ /dev/null @@ -1,207 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ -#if 0 - -#ifndef TBASE_TSKIPLIST_H -#define TBASE_TSKIPLIST_H - -#ifdef __cplusplus -extern "C" { -#endif - -#define MAX_SKIP_LIST_LEVEL 20 - -#include -#include -#include - -#include "os.h" -#include "taosdef.h" - -/* - * key of each node - * todo move to as the global structure in all search codes... - */ - -const static size_t SKIP_LIST_STR_KEY_LENGTH_THRESHOLD = 15; -typedef tVariant tSkipListKey; - -typedef enum tSkipListPointQueryType { - INCLUDE_POINT_QUERY, - EXCLUDE_POINT_QUERY, -} tSkipListPointQueryType; - -typedef struct tSkipListNode { - uint16_t nLevel; - char * pData; - - struct tSkipListNode **pForward; - struct tSkipListNode **pBackward; - - tSkipListKey key; -} tSkipListNode; - -/* - * @version 0.2 - * @date 2017/11/12 - * the simple version of SkipList. - * for multi-thread safe purpose, we employ pthread_rwlock_t to guarantee to generate - * deterministic result. Later, we will remove the lock in SkipList to further - * enhance the performance. In this case, one should use the concurrent skip list (by - * using michael-scott algorithm) instead of this simple version in a multi-thread - * environment, to achieve higher performance of read/write operations. - * - * Note: Duplicated primary key situation. - * In case of duplicated primary key, two ways can be employed to handle this situation: - * 1. add as normal insertion with out special process. - * 2. add an overflow pointer at each list node, all nodes with the same key will be added - * in the overflow pointer. In this case, the total steps of each search will be reduced significantly. - * Currently, we implement the skip list in a line with the first means, maybe refactor it soon. - * - * Memory consumption: the memory alignment causes many memory wasted. So, employ a memory - * pool will significantly reduce the total memory consumption, as well as the calloc/malloc operation costs. - * - * 3. use the iterator pattern to refactor all routines to make it more clean - */ - -// state struct, record following information: -// number of links in each level. -// avg search steps, for latest 1000 queries -// avg search rsp time, for latest 1000 queries -// total memory size -typedef struct tSkipListState { - // in bytes, sizeof(tSkipList)+sizeof(tSkipListNode)*tSkipList->nSize - uint64_t nTotalMemSize; - uint64_t nLevelNodeCnt[MAX_SKIP_LIST_LEVEL]; - uint64_t queryCount; // total query count - - /* - * only record latest 1000 queries - * when the value==1000, = 0, - * nTotalStepsForQueries = 0, - * nTotalElapsedTimeForQueries = 0 - */ - uint64_t nRecQueries; - uint16_t nTotalStepsForQueries; - uint64_t nTotalElapsedTimeForQueries; - - uint16_t nInsertObjs; - uint16_t nTotalStepsForInsert; - uint64_t nTotalElapsedTimeForInsert; -} tSkipListState; - -typedef struct tSkipList { - tSkipListNode pHead; - uint64_t nSize; - uint16_t nMaxLevel; - uint16_t nLevel; - uint16_t keyType; - uint16_t nMaxKeyLen; - - __compar_fn_t comparator; - pthread_rwlock_t lock; // will be removed soon - tSkipListState state; // skiplist state -} tSkipList; - -/* - * iterate the skiplist - * this will cause the multi-thread problem, when the skiplist is destroyed, the iterate may - * continue iterating the skiplist, so add the reference count for skiplist - * TODO add the ref for skiplist when one iterator is created - */ -typedef struct SSkipListIterator { - tSkipList * pSkipList; - tSkipListNode *cur; - int64_t num; -} SSkipListIterator; - -/* - * query condition structure to denote the range query - * todo merge the point query cond with range query condition - */ -typedef struct tSKipListQueryCond { - // when the upper bounding == lower bounding, it is a point query - tSkipListKey lowerBnd; - tSkipListKey upperBnd; - int32_t lowerBndRelOptr; // relation operator to denote if lower bound is - int32_t upperBndRelOptr; // included or not -} tSKipListQueryCond; - -tSkipList *SSkipListCreate(int16_t nMaxLevel, int16_t keyType, int16_t nMaxKeyLen); - -void *SSkipListDestroy(tSkipList *pSkipList); - -// create skip list key -tSkipListKey SSkipListCreateKey(int32_t type, char *val, size_t keyLength); - -// destroy skip list key -void tSkipListDestroyKey(tSkipListKey *pKey); - -// put data into skiplist -tSkipListNode *SSkipListPut(tSkipList *pSkipList, void *pData, tSkipListKey *pKey, int32_t insertIdenticalKey); - -/* - * get only *one* node of which key is equalled to pKey, even there are more - * than one nodes are of the same key - */ -tSkipListNode *tSkipListGetOne(tSkipList *pSkipList, tSkipListKey *pKey); - -/* - * get all data with the same keys - */ -int32_t tSkipListGets(tSkipList *pSkipList, tSkipListKey *pKey, tSkipListNode ***pRes); - -int32_t tSkipListIterateList(tSkipList *pSkipList, tSkipListNode ***pRes, bool (*fp)(tSkipListNode *, void *), - void *param); - -/* - * remove only one node of the pKey value. - * If more than one node has the same value, any one will be removed - * - * @Return - * true: one node has been removed - * false: no node has been removed - */ -bool tSkipListRemove(tSkipList *pSkipList, tSkipListKey *pKey); - -/* - * remove the specified node in parameters - */ -void tSkipListRemoveNode(tSkipList *pSkipList, tSkipListNode *pNode); - -// for debug purpose only -void SSkipListPrint(tSkipList *pSkipList, int16_t nlevel); - -/* - * range query & single point query function - */ -int32_t tSkipListQuery(tSkipList *pSkipList, tSKipListQueryCond *pQueryCond, tSkipListNode ***pResult); - -/* - * include/exclude point query - */ -int32_t tSkipListPointQuery(tSkipList *pSkipList, tSkipListKey *pKey, int32_t numOfKey, tSkipListPointQueryType type, - tSkipListNode ***pResult); - -int32_t tSkipListIteratorReset(tSkipList *pSkipList, SSkipListIterator *iter); -bool tSkipListIteratorNext(SSkipListIterator *iter); -tSkipListNode *tSkipListIteratorGet(SSkipListIterator *iter); - -#ifdef __cplusplus -} -#endif - -#endif // TBASE_TSKIPLIST_H -#endif \ No newline at end of file diff --git a/src/util/inc/tarray.h b/src/util/inc/tarray.h index 244849a1ed47c43654ea437e85e4b312a24d956e..55bdc849adf4bf6d8941f61c669f73cdcf2880fd 100644 --- a/src/util/inc/tarray.h +++ b/src/util/inc/tarray.h @@ -76,7 +76,7 @@ void* taosArrayGetP(SArray* pArray, size_t index); * @param pArray * @return */ -size_t taosArrayGetSize(SArray* pArray); +size_t taosArrayGetSize(const SArray* pArray); /** * insert data into array diff --git a/src/util/inc/tcache.h b/src/util/inc/tcache.h new file mode 100644 index 0000000000000000000000000000000000000000..36635fff30f32f3ad3a993e2e0c5139f35eec1a6 --- /dev/null +++ b/src/util/inc/tcache.h @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TCACHE_H +#define TDENGINE_TCACHE_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "os.h" +#include "tref.h" +#include "hash.h" + +typedef struct SCacheStatis { + int64_t missCount; + int64_t hitCount; + int64_t totalAccess; + int64_t refreshCount; + int32_t numOfCollision; +} SCacheStatis; + +typedef struct SCacheDataNode { + uint64_t addedTime; // the added time when this element is added or updated into cache + uint64_t expiredTime; // expiredTime expiredTime when this element should be remove from cache + uint64_t signature; + uint32_t size; // allocated size for current SCacheDataNode + uint16_t keySize : 15; + bool inTrash : 1; // denote if it is in trash or not + T_REF_DECLARE() + char *key; + char data[]; +} SCacheDataNode; + +typedef struct STrashElem { + struct STrashElem *prev; + struct STrashElem *next; + SCacheDataNode * pData; +} STrashElem; + +typedef struct { + int64_t totalSize; // total allocated buffer in this hash table, SCacheObj is not included. + int64_t refreshTime; + + /* + * to accommodate the old datanode which has the same key value of new one in hashList + * when an new node is put into cache, if an existed one with the same key: + * 1. if the old one does not be referenced, update it. + * 2. otherwise, move the old one to pTrash, addedTime the new one. + * + * when the node in pTrash does not be referenced, it will be release at the expired expiredTime + */ + STrashElem * pTrash; + void * tmrCtrl; + void * pTimer; + SCacheStatis statistics; + SHashObj * pHashTable; + int numOfElemsInTrash; // number of element in trash + int16_t deleting; // set the deleting flag to stop refreshing ASAP. + +#if defined(LINUX) + pthread_rwlock_t lock; +#else + pthread_mutex_t lock; +#endif + +} SCacheObj; + +/** + * + * @param maxSessions maximum slots available for hash elements + * @param tmrCtrl timer ctrl + * @param refreshTime refresh operation interval time, the maximum survival time when one element is expired and + * not referenced by other objects + * @return + */ +SCacheObj *taosCacheInit(void *tmrCtrl, int64_t refreshTimeInSeconds); + +/** + * add data into cache + * + * @param handle cache object + * @param key key + * @param pData cached data + * @param dataSize data size + * @param keepTime survival time in second + * @return cached element + */ +void *taosCachePut(SCacheObj *pCacheObj, char *key, void *pData, size_t dataSize, int keepTimeInSeconds); + +/** + * get data from cache + * @param pCacheObj cache object + * @param key key + * @return cached data or NULL + */ +void *taosCacheAcquireByName(SCacheObj *pCacheObj, const char *key); + +/** + * Add one reference count for the exist data, and assign this data for a new owner. + * The new owner needs to invoke the taosCacheRelease when it does not need this data anymore. + * This procedure is a faster version of taosCacheAcquireByName function, which avoids the sideeffect of the problem of + * the data is moved to trash, and taosCacheAcquireByName will fail to retrieve it again. + * + * @param handle + * @param data + * @return + */ +void *taosCacheAcquireByData(SCacheObj *pCacheObj, void *data); + +/** + * transfer the ownership of data in cache to another object without increasing reference count. + * @param handle + * @param data + * @return + */ +void *taosCacheTransfer(SCacheObj *pCacheObj, void **data); + +/** + * remove data in cache, the data will not be removed immediately. + * if it is referenced by other object, it will be remain in cache + * @param handle cache object + * @param data not the key, actually referenced data + * @param _remove force model, reduce the ref count and move the data into + * pTrash + */ +void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove); + +/** + * move all data node into trash, clear node in trash can if it is not referenced by any clients + * @param handle + */ +void taosCacheEmpty(SCacheObj *pCacheObj); + +/** + * release all allocated memory and destroy the cache object. + * + * This function only set the deleting flag, and the specific work of clean up cache is delegated to + * taosCacheRefresh function, which will executed every SCacheObj->refreshTime sec. + * + * If the value of SCacheObj->refreshTime is too large, the taosCacheRefresh function may not be invoked + * before the main thread terminated, in which case all allocated resources are simply recycled by OS. + * + * @param handle + */ +void taosCacheCleanup(SCacheObj *pCacheObj); + +#ifdef __cplusplus +} +#endif + +#endif // TDENGINE_TCACHE_H diff --git a/src/util/inc/tglobalcfg.h b/src/util/inc/tglobalcfg.h index bbb824cd3d9100f44d9abf7d5085a63e72b04eff..993992ffcbf9a33b00761dd20141224f912db958 100644 --- a/src/util/inc/tglobalcfg.h +++ b/src/util/inc/tglobalcfg.h @@ -50,7 +50,9 @@ extern int tscEmbedded; extern int64_t tsMsPerDay[2]; extern char configDir[]; -extern char tsDirectory[]; +extern char tsVnodeDir[]; +extern char tsDnodeDir[]; +extern char tsMnodeDir[]; extern char dataDir[]; extern char logDir[]; extern char scriptDir[]; @@ -58,9 +60,10 @@ extern char osName[]; extern char tsMasterIp[]; extern char tsSecondIp[]; -extern uint16_t tsMgmtDnodePort; -extern uint16_t tsMgmtShellPort; -extern uint16_t tsVnodeShellPort; +extern uint16_t tsMnodeDnodePort; +extern uint16_t tsMnodeShellPort; +extern uint16_t tsDnodeShellPort; +extern uint16_t tsDnodeMnodePort; extern uint16_t tsVnodeVnodePort; extern uint16_t tsMgmtMgmtPort; extern uint16_t tsMgmtSyncPort; @@ -262,9 +265,6 @@ SGlobalConfig *tsGetConfigOption(const char *option); #define NEEDTO_COMPRESSS_MSG(size) (tsCompressMsgSize != -1 && (size) > tsCompressMsgSize) -extern char tsMgmtDirectory[]; - - #ifdef __cplusplus } #endif diff --git a/src/util/inc/tlist.h b/src/util/inc/tlist.h new file mode 100644 index 0000000000000000000000000000000000000000..9e4dfe45801ed448e6c55a6e69c87b22e9e77b88 --- /dev/null +++ b/src/util/inc/tlist.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#ifndef _TD_LIST_ +#define _TD_LIST_ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { TD_LIST_FORWARD, TD_LIST_BACKWARD } TD_LIST_DIRECTION_T; + +typedef struct _list_node { + struct _list_node *next; + struct _list_node *prev; + char data[]; +} SListNode; + +typedef struct { + struct _list_node *head; + struct _list_node *tail; + int numOfEles; + int eleSize; +} SList; + +typedef struct { + SListNode * next; + TD_LIST_DIRECTION_T direction; +} SListIter; + +#define listHead(l) (l)->head +#define listTail(l) (l)->tail +#define listNEles(l) (l)->numOfEles +#define listEleSize(l) (l)->eleSize +#define isListEmpty(l) ((l)->numOfEles == 0) +#define listNodeFree(n) free(n); + +SList * tdListNew(int eleSize); +void tdListFree(SList *list); +void tdListEmpty(SList *list); +void tdListPrependNode(SList *list, SListNode *node); +void tdListAppendNode(SList *list, SListNode *node); +int tdListPrepend(SList *list, void *data); +int tdListAppend(SList *list, void *data); +SListNode *tdListPopHead(SList *list); +SListNode *tdListPopTail(SList *list); +SListNode *tdListPopNode(SList *list, SListNode *node); +void tdListMove(SList *src, SList *dst); + +void tdListNodeGetData(SList *list, SListNode *node, void *target); +void tdListInitIter(SList *list, SListIter *pIter, TD_LIST_DIRECTION_T direction); +SListNode *tdListNext(SListIter *pIter); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/src/util/inc/tref.h b/src/util/inc/tref.h new file mode 100644 index 0000000000000000000000000000000000000000..9483c1cc35e6d01c7b49c993d34f0c03ad950fdd --- /dev/null +++ b/src/util/inc/tref.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef TDENGINE_TREF_H +#define TDENGINE_TREF_H + +#include "os.h" + +typedef void (*_ref_fn_t)(const void* pObj); + +#define T_REF_DECLARE() \ + struct { \ + int16_t val; \ + } _ref; + +#define T_REF_REGISTER_FUNC(s, e) \ + struct { \ + _ref_fn_t start; \ + _ref_fn_t end; \ + } _ref_func = {.begin = (s), .end = (e)}; + +#define T_REF_INC(x) (atomic_add_fetch_16(&((x)->_ref.val), 1)); + +#define T_REF_INC_WITH_CB(x, p) \ + do { \ + int32_t v = atomic_add_fetch_32(&((x)->_ref.val), 1); \ + if (v == 1 && (p)->_ref_func.begin != NULL) { \ + (p)->_ref_func.begin((x)); \ + } \ + } while (0) + +#define T_REF_DEC(x) (atomic_sub_fetch_16(&((x)->_ref.val), 1)); + +#define T_REF_DEC_WITH_CB(x, p) \ + do { \ + int32_t v = atomic_sub_fetch_16(&((x)->_ref.val), 1); \ + if (v == 0 && (p)->_ref_func.end != NULL) { \ + (p)->_ref_func.end((x)); \ + } \ + } while (0) + +#define T_REF_VAL_CHECK(x) assert((x)->_ref.val >= 0); + +#define T_REF_VAL_GET(x) (x)->_ref.val + +#endif // TDENGINE_TREF_H diff --git a/src/util/inc/tskiplist.h b/src/util/inc/tskiplist.h index 2ebb9bf5f2cd48f67f4c044a2764d0b3ff449c40..0ec1d0eab52298913029f455e1566d9cf5d6630b 100644 --- a/src/util/inc/tskiplist.h +++ b/src/util/inc/tskiplist.h @@ -185,15 +185,39 @@ SSkipListNode *tSkipListPut(SSkipList *pSkipList, SSkipListNode *pNode); SArray* tSkipListGet(SSkipList *pSkipList, SSkipListKey pKey, int16_t keyType); /** - * + * get the size of skip list * @param pSkipList - * @param pRes - * @param fp - * @param param * @return */ -int32_t tSkipListIterateList(SSkipList *pSkipList, SSkipListNode ***pRes, bool (*fp)(SSkipListNode *, void *), - void *param); +size_t tSkipListGetSize(const SSkipList* pSkipList); + +/** + * create skiplist iterator + * @param pSkipList + * @return + */ +SSkipListIterator* tSkipListCreateIter(SSkipList *pSkipList); + +/** + * forward the skip list iterator + * @param iter + * @return + */ +bool tSkipListIterNext(SSkipListIterator *iter); + +/** + * get the element of skip list node + * @param iter + * @return + */ +SSkipListNode *tSkipListIterGet(SSkipListIterator *iter); + +/** + * destroy the skip list node + * @param iter + * @return + */ +void* tSkipListDestroyIter(SSkipListIterator* iter); /* * remove only one node of the pKey value. @@ -210,9 +234,6 @@ bool tSkipListRemove(SSkipList *pSkipList, SSkipListKey *pKey); */ void tSkipListRemoveNode(SSkipList *pSkipList, SSkipListNode *pNode); -int32_t tSkipListIteratorReset(SSkipList *pSkipList, SSkipListIterator *iter); -bool tSkipListIteratorNext(SSkipListIterator *iter); -SSkipListNode *tSkipListIteratorGet(SSkipListIterator *iter); #ifdef __cplusplus } diff --git a/src/util/inc/tstatus.h b/src/util/inc/tstatus.h index 74685737ce05041fd49b0a92f99f5bf65c688140..3b6dfc283a93b8477d185468dbd6d27f604f1747 100644 --- a/src/util/inc/tstatus.h +++ b/src/util/inc/tstatus.h @@ -94,17 +94,17 @@ enum TSDB_TABLE_STATUS { TSDB_METER_STATE_DROPPED = 0x18, }; -const char* taosGetVgroupStatusStr(int32_t vgroupStatus); -const char* taosGetDbStatusStr(int32_t dbStatus); -const char* taosGetVnodeStatusStr(int32_t vnodeStatus); -const char* taosGetVnodeSyncStatusStr(int32_t vnodeSyncStatus); -const char* taosGetVnodeDropStatusStr(int32_t dropping); -const char* taosGetDnodeStatusStr(int32_t dnodeStatus); -const char* taosGetDnodeLbStatusStr(int32_t dnodeBalanceStatus); -const char* taosGetVgroupLbStatusStr(int32_t vglbStatus); -const char* taosGetVnodeStreamStatusStr(int32_t vnodeStreamStatus); - -const char* taosGetTableStatusStr(int32_t tableStatus); +char* taosGetVgroupStatusStr(int32_t vgroupStatus); +char* taosGetDbStatusStr(int32_t dbStatus); +char* taosGetVnodeStatusStr(int32_t vnodeStatus); +char* taosGetVnodeSyncStatusStr(int32_t vnodeSyncStatus); +char* taosGetVnodeDropStatusStr(int32_t dropping); +char* taosGetDnodeStatusStr(int32_t dnodeStatus); +char* taosGetDnodeLbStatusStr(int32_t dnodeBalanceStatus); +char* taosGetVgroupLbStatusStr(int32_t vglbStatus); +char* taosGetVnodeStreamStatusStr(int32_t vnodeStreamStatus); +char* taosGetTableStatusStr(int32_t tableStatus); +char *taosGetShowTypeStr(int32_t showType); #ifdef __cplusplus } diff --git a/src/util/inc/tutil.h b/src/util/inc/tutil.h index cb311ed19361bad442e312732d8e499893e1556e..df97dde5ac7b1440b2cdf2abab9f0243f84ddee5 100644 --- a/src/util/inc/tutil.h +++ b/src/util/inc/tutil.h @@ -137,6 +137,13 @@ int64_t str2int64(char *str); int32_t taosFileRename(char *fullPath, char *suffix, char delimiter, char **dstPath); +/** + * + * @param fileNamePattern + * @param dstPath + */ +void getTmpfilePath(const char *fileNamePattern, char *dstPath); + int32_t taosInitTimer(void (*callback)(int), int32_t ms); bool taosMbsToUcs4(char *mbs, int32_t mbs_len, char *ucs4, int32_t ucs4_max_len); diff --git a/src/util/src/hash.c b/src/util/src/hash.c index 2f643f17fa7c866a224bfe4b2ed969c485123800..9cad14e8c707802768a84c48a236cfc76b0c3a1e 100644 --- a/src/util/src/hash.c +++ b/src/util/src/hash.c @@ -21,7 +21,11 @@ #include "tutil.h" static FORCE_INLINE void __wr_lock(void *lock) { -#if defined LINUX + if (lock == NULL) { + return; + } + +#if defined(LINUX) pthread_rwlock_wrlock(lock); #else pthread_mutex_lock(lock); @@ -29,7 +33,11 @@ static FORCE_INLINE void __wr_lock(void *lock) { } static FORCE_INLINE void __rd_lock(void *lock) { -#if defined LINUX + if (lock == NULL) { + return; + } + +#if defined(LINUX) pthread_rwlock_rdlock(lock); #else pthread_mutex_lock(lock); @@ -37,7 +45,11 @@ static FORCE_INLINE void __rd_lock(void *lock) { } static FORCE_INLINE void __unlock(void *lock) { -#if defined LINUX + if (lock == NULL) { + return; + } + +#if defined(LINUX) pthread_rwlock_unlock(lock); #else pthread_mutex_unlock(lock); @@ -45,7 +57,11 @@ static FORCE_INLINE void __unlock(void *lock) { } static FORCE_INLINE int32_t __lock_init(void *lock) { -#if defined LINUX + if (lock == NULL) { + return 0; + } + +#if defined(LINUX) return pthread_rwlock_init(lock, NULL); #else return pthread_mutex_init(lock, NULL); @@ -53,7 +69,11 @@ static FORCE_INLINE int32_t __lock_init(void *lock) { } static FORCE_INLINE void __lock_destroy(void *lock) { -#if defined LINUX + if (lock == NULL) { + return; + } + +#if defined(LINUX) pthread_rwlock_destroy(lock); #else pthread_mutex_destroy(lock); @@ -68,21 +88,12 @@ static FORCE_INLINE int32_t taosHashCapacity(int32_t length) { return i; } -/** - * hash key function - * - * @param key key string - * @param len length of key - * @return hash value - */ -static FORCE_INLINE uint32_t taosHashKey(const char *key, uint32_t len) { return MurmurHash3_32(key, len); } - /** * inplace update node in hash table - * @param pObj hash table object + * @param pHashObj hash table object * @param pNode data node */ -static void doUpdateHashTable(HashObj *pObj, SHashNode *pNode) { +static void doUpdateHashTable(SHashObj *pHashObj, SHashNode *pNode) { if (pNode->prev1) { pNode->prev1->next = pNode; } @@ -96,16 +107,16 @@ static void doUpdateHashTable(HashObj *pObj, SHashNode *pNode) { /** * get SHashNode from hashlist, nodes from trash are not included. - * @param pObj Cache objection + * @param pHashObj Cache objection * @param key key for hash * @param keyLen key length * @return */ -static SHashNode *doGetNodeFromHashTable(HashObj *pObj, const char *key, uint32_t keyLen, uint32_t *hashVal) { - uint32_t hash = (*pObj->hashFp)(key, keyLen); +static SHashNode *doGetNodeFromHashTable(SHashObj *pHashObj, const char *key, uint32_t keyLen, uint32_t *hashVal) { + uint32_t hash = (*pHashObj->hashFp)(key, keyLen); - int32_t slot = HASH_INDEX(hash, pObj->capacity); - SHashEntry *pEntry = pObj->hashList[slot]; + int32_t slot = HASH_INDEX(hash, pHashObj->capacity); + SHashEntry *pEntry = pHashObj->hashList[slot]; SHashNode *pNode = pEntry->next; while (pNode) { @@ -117,7 +128,7 @@ static SHashNode *doGetNodeFromHashTable(HashObj *pObj, const char *key, uint32_ } if (pNode) { - assert(HASH_INDEX(pNode->hashVal, pObj->capacity) == slot); + assert(HASH_INDEX(pNode->hashVal, pHashObj->capacity) == slot); } // return the calculated hash value, to avoid calculating it again in other functions @@ -131,10 +142,10 @@ static SHashNode *doGetNodeFromHashTable(HashObj *pObj, const char *key, uint32_ /** * resize the hash list if the threshold is reached * - * @param pObj + * @param pHashObj */ -static void taosHashTableResize(HashObj *pObj) { - if (pObj->size < pObj->capacity * HASH_DEFAULT_LOAD_FACTOR) { +static void taosHashTableResize(SHashObj *pHashObj) { + if (pHashObj->size < pHashObj->capacity * HASH_DEFAULT_LOAD_FACTOR) { return; } @@ -142,38 +153,38 @@ static void taosHashTableResize(HashObj *pObj) { SHashNode *pNode = NULL; SHashNode *pNext = NULL; - int32_t newSize = pObj->capacity << 1U; + int32_t newSize = pHashObj->capacity << 1U; if (newSize > HASH_MAX_CAPACITY) { - pTrace("current capacity:%d, maximum capacity:%d, no resize applied due to limitation is reached", pObj->capacity, - HASH_MAX_CAPACITY); + pTrace("current capacity:%d, maximum capacity:%d, no resize applied due to limitation is reached", + pHashObj->capacity, HASH_MAX_CAPACITY); return; } int64_t st = taosGetTimestampUs(); - SHashEntry **pNewEntry = realloc(pObj->hashList, sizeof(SHashEntry*) * newSize); + SHashEntry **pNewEntry = realloc(pHashObj->hashList, sizeof(SHashEntry *) * newSize); if (pNewEntry == NULL) { - pTrace("cache resize failed due to out of memory, capacity remain:%d", pObj->capacity); + pTrace("cache resize failed due to out of memory, capacity remain:%d", pHashObj->capacity); return; } - pObj->hashList = pNewEntry; - for(int32_t i = pObj->capacity; i < newSize; ++i) { - pObj->hashList[i] = calloc(1, sizeof(SHashEntry)); + pHashObj->hashList = pNewEntry; + for (int32_t i = pHashObj->capacity; i < newSize; ++i) { + pHashObj->hashList[i] = calloc(1, sizeof(SHashEntry)); } - - pObj->capacity = newSize; - for (int32_t i = 0; i < pObj->capacity; ++i) { - SHashEntry *pEntry = pObj->hashList[i]; + pHashObj->capacity = newSize; + + for (int32_t i = 0; i < pHashObj->capacity; ++i) { + SHashEntry *pEntry = pHashObj->hashList[i]; pNode = pEntry->next; if (pNode != NULL) { assert(pNode->prev1 == pEntry && pEntry->num > 0); } - + while (pNode) { - int32_t j = HASH_INDEX(pNode->hashVal, pObj->capacity); + int32_t j = HASH_INDEX(pNode->hashVal, pHashObj->capacity); if (j == i) { // this key resides in the same slot, no need to relocate it pNode = pNode->next; } else { @@ -181,13 +192,13 @@ static void taosHashTableResize(HashObj *pObj) { // remove from current slot assert(pNode->prev1 != NULL); - - if (pNode->prev1 == pEntry) { // first node of the overflow linked list + + if (pNode->prev1 == pEntry) { // first node of the overflow linked list pEntry->next = pNode->next; } else { pNode->prev->next = pNode->next; } - + pEntry->num--; assert(pEntry->num >= 0); @@ -199,17 +210,17 @@ static void taosHashTableResize(HashObj *pObj) { pNode->next = NULL; pNode->prev1 = NULL; - SHashEntry *pNewIndexEntry = pObj->hashList[j]; + SHashEntry *pNewIndexEntry = pHashObj->hashList[j]; if (pNewIndexEntry->next != NULL) { assert(pNewIndexEntry->next->prev1 == pNewIndexEntry); - + pNewIndexEntry->next->prev = pNode; } - + pNode->next = pNewIndexEntry->next; pNode->prev1 = pNewIndexEntry; - + pNewIndexEntry->next = pNode; pNewIndexEntry->num++; @@ -221,8 +232,8 @@ static void taosHashTableResize(HashObj *pObj) { int64_t et = taosGetTimestampUs(); - pTrace("hash table resize completed, new capacity:%d, load factor:%f, elapsed time:%fms", pObj->capacity, - ((double)pObj->size) / pObj->capacity, (et - st) / 1000.0); + pTrace("hash table resize completed, new capacity:%d, load factor:%f, elapsed time:%fms", pHashObj->capacity, + ((double)pHashObj->size) / pHashObj->capacity, (et - st) / 1000.0); } /** @@ -230,43 +241,51 @@ static void taosHashTableResize(HashObj *pObj) { * @param fn hash function * @return */ -void *taosInitHashTable(uint32_t capacity, _hash_fn_t fn, bool multithreadSafe) { +SHashObj *taosHashInit(size_t capacity, _hash_fn_t fn, bool threadsafe) { if (capacity == 0 || fn == NULL) { return NULL; } - HashObj *pObj = (HashObj *)calloc(1, sizeof(HashObj)); - if (pObj == NULL) { + SHashObj *pHashObj = (SHashObj *)calloc(1, sizeof(SHashObj)); + if (pHashObj == NULL) { pError("failed to allocate memory, reason:%s", strerror(errno)); return NULL; } // the max slots is not defined by user - pObj->capacity = taosHashCapacity(capacity); - assert((pObj->capacity & (pObj->capacity - 1)) == 0); + pHashObj->capacity = taosHashCapacity(capacity); + assert((pHashObj->capacity & (pHashObj->capacity - 1)) == 0); - pObj->hashFp = fn; + pHashObj->hashFp = fn; - pObj->hashList = (SHashEntry **)calloc(pObj->capacity, sizeof(SHashEntry*)); - if (pObj->hashList == NULL) { - free(pObj); + pHashObj->hashList = (SHashEntry **)calloc(pHashObj->capacity, sizeof(SHashEntry *)); + if (pHashObj->hashList == NULL) { + free(pHashObj); pError("failed to allocate memory, reason:%s", strerror(errno)); return NULL; } - - for(int32_t i = 0; i < pObj->capacity; ++i) { - pObj->hashList[i] = calloc(1, sizeof(SHashEntry)); + + for (int32_t i = 0; i < pHashObj->capacity; ++i) { + pHashObj->hashList[i] = calloc(1, sizeof(SHashEntry)); } - if (multithreadSafe && (__lock_init(pObj) != 0)) { - free(pObj->hashList); - free(pObj); + if (threadsafe) { +#if defined(LINUX) + pHashObj->lock = calloc(1, sizeof(pthread_rwlock_t)); +#else + pHashObj->lock = calloc(1, sizeof(pthread_mutex_t)); +#endif + } + + if (__lock_init(pHashObj->lock) != 0) { + free(pHashObj->hashList); + free(pHashObj); pError("failed to init lock, reason:%s", strerror(errno)); return NULL; } - return (void *)pObj; + return pHashObj; } /** @@ -277,7 +296,7 @@ void *taosInitHashTable(uint32_t capacity, _hash_fn_t fn, bool multithreadSafe) * @param size size of block * @return SHashNode */ -static SHashNode *doCreateHashNode(const char *key, uint32_t keyLen, const char *pData, size_t dataSize, +static SHashNode *doCreateHashNode(const char *key, size_t keyLen, const char *pData, size_t dataSize, uint32_t hashVal) { size_t totalSize = dataSize + sizeof(SHashNode) + keyLen; @@ -298,7 +317,7 @@ static SHashNode *doCreateHashNode(const char *key, uint32_t keyLen, const char return pNewNode; } -static SHashNode *doUpdateHashNode(SHashNode *pNode, const char *key, uint32_t keyLen, const char *pData, +static SHashNode *doUpdateHashNode(SHashNode *pNode, const char *key, size_t keyLen, const char *pData, size_t dataSize) { size_t size = dataSize + sizeof(SHashNode) + keyLen; @@ -320,15 +339,15 @@ static SHashNode *doUpdateHashNode(SHashNode *pNode, const char *key, uint32_t k /** * insert the hash node at the front of the linked list * - * @param pObj + * @param pHashObj * @param pNode */ -static void doAddToHashTable(HashObj *pObj, SHashNode *pNode) { +static void doAddToHashTable(SHashObj *pHashObj, SHashNode *pNode) { assert(pNode != NULL); - int32_t index = HASH_INDEX(pNode->hashVal, pObj->capacity); - SHashEntry *pEntry = pObj->hashList[index]; - + int32_t index = HASH_INDEX(pNode->hashVal, pHashObj->capacity); + SHashEntry *pEntry = pHashObj->hashList[index]; + pNode->next = pEntry->next; if (pEntry->next) { @@ -337,76 +356,62 @@ static void doAddToHashTable(HashObj *pObj, SHashNode *pNode) { pEntry->next = pNode; pNode->prev1 = pEntry; - + pEntry->num++; - pObj->size++; + pHashObj->size++; } -int32_t taosNumElemsInHashTable(HashObj *pObj) { - if (pObj == NULL) { +size_t taosHashGetSize(const SHashObj *pHashObj) { + if (pHashObj == NULL) { return 0; } - - return pObj->size; + + return pHashObj->size; } /** * add data node into hash table - * @param pObj hash object + * @param pHashObj hash object * @param pNode hash node */ -int32_t taosAddToHashTable(HashObj *pObj, const char *key, uint32_t keyLen, void *data, uint32_t size) { - if (pObj->multithreadSafe) { - __wr_lock(&pObj->lock); - } +int32_t taosHashPut(SHashObj *pHashObj, const char *key, size_t keyLen, void *data, size_t size) { + __wr_lock(pHashObj->lock); uint32_t hashVal = 0; - SHashNode *pNode = doGetNodeFromHashTable(pObj, key, keyLen, &hashVal); + SHashNode *pNode = doGetNodeFromHashTable(pHashObj, key, keyLen, &hashVal); if (pNode == NULL) { // no data in hash table with the specified key, add it into hash table - taosHashTableResize(pObj); + taosHashTableResize(pHashObj); SHashNode *pNewNode = doCreateHashNode(key, keyLen, data, size, hashVal); if (pNewNode == NULL) { - if (pObj->multithreadSafe) { - __unlock(&pObj->lock); - } + __unlock(pHashObj->lock); return -1; } - doAddToHashTable(pObj, pNewNode); + doAddToHashTable(pHashObj, pNewNode); } else { SHashNode *pNewNode = doUpdateHashNode(pNode, key, keyLen, data, size); if (pNewNode == NULL) { - if (pObj->multithreadSafe) { - __unlock(&pObj->lock); - } - + __unlock(pHashObj->lock); return -1; } - doUpdateHashTable(pObj, pNewNode); - } - - if (pObj->multithreadSafe) { - __unlock(&pObj->lock); + doUpdateHashTable(pHashObj, pNewNode); } + __unlock(pHashObj->lock); return 0; } -char *taosGetDataFromHashTable(HashObj *pObj, const char *key, uint32_t keyLen) { - if (pObj->multithreadSafe) { - __rd_lock(&pObj->lock); - } +void *taosHashGet(SHashObj *pHashObj, const char *key, size_t keyLen) { + __rd_lock(pHashObj->lock); uint32_t hashVal = 0; - SHashNode *pNode = doGetNodeFromHashTable(pObj, key, keyLen, &hashVal); + SHashNode *pNode = doGetNodeFromHashTable(pHashObj, key, keyLen, &hashVal); - if (pObj->multithreadSafe) { - __unlock(&pObj->lock); - } + __unlock(pHashObj->lock); if (pNode != NULL) { assert(pNode->hashVal == hashVal); @@ -419,43 +424,39 @@ char *taosGetDataFromHashTable(HashObj *pObj, const char *key, uint32_t keyLen) /** * remove node in hash list - * @param pObj + * @param pHashObj * @param pNode */ -void taosDeleteFromHashTable(HashObj *pObj, const char *key, uint32_t keyLen) { - if (pObj->multithreadSafe) { - __wr_lock(&pObj->lock); - } +void taosHashRemove(SHashObj *pHashObj, const char *key, size_t keyLen) { + __wr_lock(pHashObj->lock); - uint32_t val = 0; - SHashNode *pNode = doGetNodeFromHashTable(pObj, key, keyLen, &val); + uint32_t val = 0; + SHashNode *pNode = doGetNodeFromHashTable(pHashObj, key, keyLen, &val); if (pNode == NULL) { - if (pObj->multithreadSafe) { - __unlock(&pObj->lock); - } - + __unlock(pHashObj->lock); return; } SHashNode *pNext = pNode->next; if (pNode->prev != NULL) { - int32_t slot = HASH_INDEX(val, pObj->capacity); - if (pObj->hashList[slot]->next == pNode) { - pObj->hashList[slot]->next = pNext; + int32_t slot = HASH_INDEX(val, pHashObj->capacity); + if (pHashObj->hashList[slot]->next == pNode) { + pHashObj->hashList[slot]->next = pNext; } else { pNode->prev->next = pNext; } } - + if (pNext != NULL) { pNext->prev = pNode->prev; } - uint32_t index = HASH_INDEX(pNode->hashVal, pObj->capacity); - SHashEntry *pEntry = pObj->hashList[index]; + uint32_t index = HASH_INDEX(pNode->hashVal, pHashObj->capacity); + + SHashEntry *pEntry = pHashObj->hashList[index]; pEntry->num--; - pObj->size--; + pHashObj->size--; pNode->next = NULL; pNode->prev = NULL; @@ -463,61 +464,160 @@ void taosDeleteFromHashTable(HashObj *pObj, const char *key, uint32_t keyLen) { pTrace("key:%s %p remove from hash table", pNode->key, pNode); tfree(pNode); - if (pObj->multithreadSafe) { - __unlock(&pObj->lock); - } + __unlock(pHashObj->lock); } -void taosCleanUpHashTable(void *handle) { - HashObj *pObj = (HashObj *)handle; - if (pObj == NULL || pObj->capacity <= 0) return; +void taosHashCleanup(SHashObj *pHashObj) { + if (pHashObj == NULL || pHashObj->capacity <= 0) { + return; + } SHashNode *pNode, *pNext; - if (pObj->multithreadSafe) { - __wr_lock(&pObj->lock); - } + __wr_lock(pHashObj->lock); - if (pObj->hashList) { - for (int32_t i = 0; i < pObj->capacity; ++i) { - SHashEntry *pEntry = pObj->hashList[i]; + if (pHashObj->hashList) { + for (int32_t i = 0; i < pHashObj->capacity; ++i) { + SHashEntry *pEntry = pHashObj->hashList[i]; pNode = pEntry->next; while (pNode) { pNext = pNode->next; + if (pHashObj->freeFp) { + pHashObj->freeFp(pNode->data); + } + free(pNode); pNode = pNext; } - + tfree(pEntry); } - free(pObj->hashList); + free(pHashObj->hashList); + } + + __unlock(pHashObj->lock); + __lock_destroy(pHashObj->lock); + + tfree(pHashObj->lock); + memset(pHashObj, 0, sizeof(SHashObj)); + free(pHashObj); +} + +void taosHashSetFreecb(SHashObj *pHashObj, _hash_free_fn_t freeFp) { + if (pHashObj == NULL || freeFp == NULL) { + return; + } + + pHashObj->freeFp = freeFp; +} + +SHashMutableIterator *taosHashCreateIter(SHashObj *pHashObj) { + SHashMutableIterator *pIter = calloc(1, sizeof(SHashMutableIterator)); + if (pIter == NULL) { + return NULL; } - if (pObj->multithreadSafe) { - __unlock(&pObj->lock); - __lock_destroy(&pObj->lock); + pIter->pHashObj = pHashObj; + return pIter; +} + +static SHashNode *getNextHashNode(SHashMutableIterator *pIter) { + assert(pIter != NULL); + + while (pIter->entryIndex < pIter->pHashObj->capacity) { + SHashEntry *pEntry = pIter->pHashObj->hashList[pIter->entryIndex]; + if (pEntry->next == NULL) { + pIter->entryIndex++; + continue; + } + + return pEntry->next; } - memset(pObj, 0, sizeof(HashObj)); - free(pObj); + return NULL; +} + +bool taosHashIterNext(SHashMutableIterator *pIter) { + if (pIter == NULL) { + return false; + } + + size_t size = taosHashGetSize(pIter->pHashObj); + if (size == 0 || pIter->num >= size) { + return false; + } + + // check the first one + if (pIter->num == 0) { + assert(pIter->pCur == NULL && pIter->pNext == NULL); + + while (1) { + SHashEntry *pEntry = pIter->pHashObj->hashList[pIter->entryIndex]; + if (pEntry->next == NULL) { + pIter->entryIndex++; + continue; + } + + pIter->pCur = pEntry->next; + + if (pIter->pCur->next) { + pIter->pNext = pIter->pCur->next; + } else { + pIter->pNext = getNextHashNode(pIter); + } + + break; + } + + pIter->num++; + return true; + } else { + assert(pIter->pCur != NULL); + if (pIter->pNext) { + pIter->pCur = pIter->pNext; + } else { // no more data in the hash list + return false; + } + + pIter->num++; + + if (pIter->pCur->next) { + pIter->pNext = pIter->pCur->next; + } else { + pIter->pNext = getNextHashNode(pIter); + } + + return true; + } +} + +void *taosHashIterGet(SHashMutableIterator *iter) { return (iter == NULL) ? NULL : iter->pCur->data; } + +void *taosHashDestroyIter(SHashMutableIterator *iter) { + if (iter == NULL) { + return NULL; + } + + free(iter); + return NULL; } // for profile only -int32_t taosGetHashMaxOverflowLength(HashObj* pObj) { - if (pObj == NULL || pObj->size == 0) { +int32_t taosHashGetMaxOverflowLinkLength(const SHashObj *pHashObj) { + if (pHashObj == NULL || pHashObj->size == 0) { return 0; } - + int32_t num = 0; - - for(int32_t i = 0; i < pObj->size; ++i) { - SHashEntry *pEntry = pObj->hashList[i]; + + for (int32_t i = 0; i < pHashObj->size; ++i) { + SHashEntry *pEntry = pHashObj->hashList[i]; if (num < pEntry->num) { num = pEntry->num; } } - + return num; } diff --git a/src/util/src/ihash.c b/src/util/src/ihash.c index 8c492b03f867036d3fcb3a52f872f57142cba7ec..6b58d8ef31c3b83c8c0160364abf2ae97132d0f0 100644 --- a/src/util/src/ihash.c +++ b/src/util/src/ihash.c @@ -26,7 +26,7 @@ typedef struct { IHashNode **hashList; int32_t maxSessions; int32_t dataSize; - int32_t (*hashFp)(void *, uint64_t key); + int32_t (*hashFp)(void *, uint64_t key); pthread_mutex_t mutex; } IHashObj; @@ -186,3 +186,84 @@ void taosCleanUpIntHash(void *handle) { memset(pObj, 0, sizeof(IHashObj)); free(pObj); } + +void taosCleanUpIntHashWithFp(void *handle, void (*fp)(char *)) { + IHashObj * pObj; + IHashNode *pNode, *pNext; + + pObj = (IHashObj *)handle; + if (pObj == NULL || pObj->maxSessions <= 0) return; + + pthread_mutex_lock(&pObj->mutex); + + if (pObj->hashList) { + for (int i = 0; i < pObj->maxSessions; ++i) { + pNode = pObj->hashList[i]; + while (pNode) { + pNext = pNode->next; + if (fp != NULL) (*fp)(pNode->data); + free(pNode); + pNode = pNext; + } + } + + free(pObj->hashList); + } + + pthread_mutex_unlock(&pObj->mutex); + + pthread_mutex_destroy(&pObj->mutex); + + memset(pObj, 0, sizeof(IHashObj)); + free(pObj); +} + +void taosVisitIntHashWithFp(void *handle, int (*fp)(char *, void *), void *param) { + IHashObj * pObj; + IHashNode *pNode, *pNext; + char * pData = NULL; + + pObj = (IHashObj *)handle; + if (pObj == NULL || pObj->maxSessions <= 0) return NULL; + + pthread_mutex_lock(&pObj->mutex); + + if (pObj->hashList) { + for (int i = 0; i < pObj->maxSessions; ++i) { + pNode = pObj->hashList[i]; + while (pNode) { + pNext = pNode->next; + (*fp)(pNode->data, param); + pNode = pNext; + } + } + } + + pthread_mutex_unlock(&pObj->mutex); +} + +int32_t taosGetIntHashSize(void *handle) { + IHashObj * pObj; + IHashNode *pNode, *pNext; + char * pData = NULL; + int32_t num = 0; + + pObj = (IHashObj *)handle; + if (pObj == NULL || pObj->maxSessions <= 0) return NULL; + + pthread_mutex_lock(&pObj->mutex); + + if (pObj->hashList) { + for (int i = 0; i < pObj->maxSessions; ++i) { + pNode = pObj->hashList[i]; + while (pNode) { + pNext = pNode->next; + num++; + pNode = pNext; + } + } + } + + pthread_mutex_unlock(&pObj->mutex); + return num; +} \ No newline at end of file diff --git a/src/util/src/shash.c b/src/util/src/shash.c index 5be0dfa9739157ab231bdad7c52d7e15416736dd..da97af84bbc957ba102add1b34bff23d71c91d0e 100644 --- a/src/util/src/shash.c +++ b/src/util/src/shash.c @@ -33,7 +33,7 @@ typedef struct { SHashNode **hashList; uint32_t maxSessions; uint32_t dataSize; - uint32_t (*hashFp)(void *, char *string); + uint32_t (*hashFp)(void *, char *string); pthread_mutex_t mutex; } SHashObj; diff --git a/src/util/src/sskiplist.c b/src/util/src/sskiplist.c deleted file mode 100644 index cba38e9bed5242c28a47fb13e00907299d46c184..0000000000000000000000000000000000000000 --- a/src/util/src/sskiplist.c +++ /dev/null @@ -1,848 +0,0 @@ -/* - * Copyright (c) 2019 TAOS Data, Inc. - * - * This program is free software: you can use, redistribute, and/or modify - * it under the terms of the GNU Affero General Public License, version 3 - * or later ("AGPL"), as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -#if 0 -#include "os.h" - -#include "tlog.h" -#include "taosdef.h" -#include "sskiplist.h" -#include "tutil.h" - -static FORCE_INLINE void recordNodeEachLevel(tSkipList *pSkipList, int32_t nLevel) { // record link count in each level - for (int32_t i = 0; i < nLevel; ++i) { - pSkipList->state.nLevelNodeCnt[i]++; - } -} - -static FORCE_INLINE void removeNodeEachLevel(tSkipList *pSkipList, int32_t nLevel) { - for (int32_t i = 0; i < nLevel; ++i) { - pSkipList->state.nLevelNodeCnt[i]--; - } -} - -static FORCE_INLINE int32_t getSkipListNodeRandomHeight(tSkipList *pSkipList) { - const uint32_t factor = 4; - - int32_t n = 1; - while ((rand() % factor) == 0 && n <= pSkipList->nMaxLevel) { - n++; - } - - return n; -} - -static FORCE_INLINE int32_t getSkipListNodeLevel(tSkipList *pSkipList) { - int32_t nLevel = getSkipListNodeRandomHeight(pSkipList); - if (pSkipList->nSize == 0) { - nLevel = 1; - pSkipList->nLevel = 1; - } else { - if (nLevel > pSkipList->nLevel && pSkipList->nLevel < pSkipList->nMaxLevel) { - nLevel = (++pSkipList->nLevel); - } - } - return nLevel; -} - -void tSkipListDoInsert(tSkipList *pSkipList, tSkipListNode **forward, int32_t nLevel, tSkipListNode *pNode); - -void SSkipListDoRecordPut(tSkipList *pSkipList) { - const int32_t MAX_RECORD_NUM = 1000; - - if (pSkipList->state.nInsertObjs == MAX_RECORD_NUM) { - pSkipList->state.nInsertObjs = 1; - pSkipList->state.nTotalStepsForInsert = 0; - pSkipList->state.nTotalElapsedTimeForInsert = 0; - } else { - pSkipList->state.nInsertObjs++; - } -} - -int32_t compareIntVal(const void *pLeft, const void *pRight) { - int64_t lhs = ((tSkipListKey *)pLeft)->i64Key; - int64_t rhs = ((tSkipListKey *)pRight)->i64Key; - - DEFAULT_COMP(lhs, rhs); -} - -int32_t scompareIntDoubleVal(const void *pLeft, const void *pRight) { - int64_t lhs = ((tSkipListKey *)pLeft)->i64Key; - double rhs = ((tSkipListKey *)pRight)->dKey; - if (fabs(lhs - rhs) < FLT_EPSILON) { - return 0; - } else { - return (lhs > rhs) ? 1 : -1; - } -} - -int32_t scompareDoubleIntVal(const void *pLeft, const void *pRight) { - double lhs = ((tSkipListKey *)pLeft)->dKey; - int64_t rhs = ((tSkipListKey *)pRight)->i64Key; - if (fabs(lhs - rhs) < FLT_EPSILON) { - return 0; - } else { - return (lhs > rhs) ? 1 : -1; - } -} - -int32_t scompareDoubleVal(const void *pLeft, const void *pRight) { - double ret = (((tSkipListKey *)pLeft)->dKey - ((tSkipListKey *)pRight)->dKey); - if (fabs(ret) < FLT_EPSILON) { - return 0; - } else { - return ret > 0 ? 1 : -1; - } -} - -int32_t scompareStrVal(const void *pLeft, const void *pRight) { - tSkipListKey *pL = (tSkipListKey *)pLeft; - tSkipListKey *pR = (tSkipListKey *)pRight; - - if (pL->nLen == 0 && pR->nLen == 0) { - return 0; - } - - //handle only one-side bound compare situation, there is only lower bound or only upper bound - if (pL->nLen == -1) { - return 1; // no lower bound, lower bound is minimum, always return -1; - } else if (pR->nLen == -1) { - return -1; // no upper bound, upper bound is maximum situation, always return 1; - } - - int32_t ret = strcmp(((tSkipListKey *)pLeft)->pz, ((tSkipListKey *)pRight)->pz); - - if (ret == 0) { - return 0; - } else { - return ret > 0 ? 1 : -1; - } -} - -int32_t scompareWStrVal(const void *pLeft, const void *pRight) { - tSkipListKey *pL = (tSkipListKey *)pLeft; - tSkipListKey *pR = (tSkipListKey *)pRight; - - if (pL->nLen == 0 && pR->nLen == 0) { - return 0; - } - - //handle only one-side bound compare situation, there is only lower bound or only upper bound - if (pL->nLen == -1) { - return 1; // no lower bound, lower bound is minimum, always return -1; - } else if (pR->nLen == -1) { - return -1; // no upper bound, upper bound is maximum situation, always return 1; - } - - int32_t ret = wcscmp(((tSkipListKey *)pLeft)->wpz, ((tSkipListKey *)pRight)->wpz); - - if (ret == 0) { - return 0; - } else { - return ret > 0 ? 1 : -1; - } -} - -static __compar_fn_t getKeyFilterComparator(tSkipList *pSkipList, int32_t filterDataType) { - __compar_fn_t comparator = NULL; - - switch (pSkipList->keyType) { - case TSDB_DATA_TYPE_TINYINT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_BIGINT: - case TSDB_DATA_TYPE_BOOL: { - if (filterDataType >= TSDB_DATA_TYPE_BOOL && filterDataType <= TSDB_DATA_TYPE_BIGINT) { - comparator = compareIntVal; - } else if (filterDataType >= TSDB_DATA_TYPE_FLOAT && filterDataType <= TSDB_DATA_TYPE_DOUBLE) { - comparator = scompareIntDoubleVal; - } - break; - } - case TSDB_DATA_TYPE_FLOAT: - case TSDB_DATA_TYPE_DOUBLE: { - if (filterDataType >= TSDB_DATA_TYPE_BOOL && filterDataType <= TSDB_DATA_TYPE_BIGINT) { - comparator = scompareDoubleIntVal; - } else if (filterDataType >= TSDB_DATA_TYPE_FLOAT && filterDataType <= TSDB_DATA_TYPE_DOUBLE) { - comparator = scompareDoubleVal; - } - break; - } - case TSDB_DATA_TYPE_BINARY: - comparator = scompareStrVal; - break; - case TSDB_DATA_TYPE_NCHAR: - comparator = scompareWStrVal; - break; - default: - comparator = compareIntVal; - break; - } - - return comparator; -} - -static __compar_fn_t getKeyComparator(int32_t keyType) { - __compar_fn_t comparator = NULL; - - switch (keyType) { - case TSDB_DATA_TYPE_TINYINT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_BIGINT: - case TSDB_DATA_TYPE_BOOL: - comparator = compareIntVal; - break; - - case TSDB_DATA_TYPE_FLOAT: - case TSDB_DATA_TYPE_DOUBLE: - comparator = scompareDoubleVal; - break; - - case TSDB_DATA_TYPE_BINARY: - comparator = scompareStrVal; - break; - - case TSDB_DATA_TYPE_NCHAR: - comparator = scompareWStrVal; - break; - - default: - comparator = compareIntVal; - break; - } - - return comparator; -} - -tSkipList* SSkipListCreate(int16_t nMaxLevel, int16_t keyType, int16_t nMaxKeyLen) { - tSkipList *pSkipList = (tSkipList *)calloc(1, sizeof(tSkipList)); - if (pSkipList == NULL) { - return NULL; - } - - pSkipList->keyType = keyType; - - pSkipList->comparator = getKeyComparator(keyType); - pSkipList->pHead.pForward = (tSkipListNode **)calloc(1, POINTER_BYTES * MAX_SKIP_LIST_LEVEL); - - pSkipList->nMaxLevel = MAX_SKIP_LIST_LEVEL; - pSkipList->nLevel = 1; - - pSkipList->nMaxKeyLen = nMaxKeyLen; - pSkipList->nMaxLevel = nMaxLevel; - - if (pthread_rwlock_init(&pSkipList->lock, NULL) != 0) { - tfree(pSkipList->pHead.pForward); - tfree(pSkipList); - return NULL; - } - - srand(time(NULL)); - pSkipList->state.nTotalMemSize += sizeof(tSkipList); - return pSkipList; -} - -static void doRemove(tSkipList *pSkipList, tSkipListNode *pNode, tSkipListNode *forward[]) { - int32_t level = pNode->nLevel; - for (int32_t j = level - 1; j >= 0; --j) { - if ((forward[j]->pForward[j] != NULL) && (forward[j]->pForward[j]->pForward[j])) { - forward[j]->pForward[j]->pForward[j]->pBackward[j] = forward[j]; - } - - if (forward[j]->pForward[j] != NULL) { - forward[j]->pForward[j] = forward[j]->pForward[j]->pForward[j]; - } - } - - pSkipList->state.nTotalMemSize -= (sizeof(tSkipListNode) + POINTER_BYTES * pNode->nLevel * 2); - removeNodeEachLevel(pSkipList, pNode->nLevel); - - tfree(pNode); - --pSkipList->nSize; -} - -static size_t getOneNodeSize(const tSkipListKey *pKey, int32_t nLevel) { - size_t size = sizeof(tSkipListNode) + sizeof(intptr_t) * (nLevel << 1); - if (pKey->nType == TSDB_DATA_TYPE_BINARY) { - size += pKey->nLen + 1; - } else if (pKey->nType == TSDB_DATA_TYPE_NCHAR) { - size += (pKey->nLen + 1) * TSDB_NCHAR_SIZE; - } - - return size; -} - -static tSkipListNode *SSkipListCreateNode(void *pData, const tSkipListKey *pKey, int32_t nLevel) { - size_t nodeSize = getOneNodeSize(pKey, nLevel); - tSkipListNode *pNode = (tSkipListNode *)calloc(1, nodeSize); - - pNode->pForward = (tSkipListNode **)(&pNode[1]); - pNode->pBackward = (pNode->pForward + nLevel); - - pNode->pData = pData; - - pNode->key = *pKey; - if (pKey->nType == TSDB_DATA_TYPE_BINARY) { - pNode->key.pz = (char *)(pNode->pBackward + nLevel); - - strcpy(pNode->key.pz, pKey->pz); - pNode->key.pz[pKey->nLen] = 0; - } else if (pKey->nType == TSDB_DATA_TYPE_NCHAR) { - pNode->key.wpz = (wchar_t *)(pNode->pBackward + nLevel); - wcsncpy(pNode->key.wpz, pKey->wpz, pKey->nLen); - pNode->key.wpz[pKey->nLen] = 0; - } - - pNode->nLevel = nLevel; - return pNode; -} - -tSkipListKey SSkipListCreateKey(int32_t type, char *val, size_t keyLength) { - tSkipListKey k = {0}; - tVariantCreateFromBinary(&k, val, (uint32_t) keyLength, (uint32_t) type); - return k; -} - -void tSkipListDestroyKey(tSkipListKey *pKey) { tVariantDestroy(pKey); } - -void* SSkipListDestroy(tSkipList *pSkipList) { - if (pSkipList == NULL) { - return NULL; - } - - pthread_rwlock_wrlock(&pSkipList->lock); - tSkipListNode *pNode = pSkipList->pHead.pForward[0]; - while (pNode) { - tSkipListNode *pTemp = pNode; - pNode = pNode->pForward[0]; - tfree(pTemp); - } - - tfree(pSkipList->pHead.pForward); - pthread_rwlock_unlock(&pSkipList->lock); - - pthread_rwlock_destroy(&pSkipList->lock); - tfree(pSkipList); - - return NULL; -} - -tSkipListNode *SSkipListPut(tSkipList *pSkipList, void *pData, tSkipListKey *pKey, int32_t insertIdenticalKey) { - if (pSkipList == NULL) { - return NULL; - } - - pthread_rwlock_wrlock(&pSkipList->lock); - - // record one node is put into skiplist - SSkipListDoRecordPut(pSkipList); - - tSkipListNode *px = &pSkipList->pHead; - - tSkipListNode *forward[MAX_SKIP_LIST_LEVEL] = {0}; - for (int32_t i = pSkipList->nLevel - 1; i >= 0; --i) { - while (px->pForward[i] != NULL && (pSkipList->comparator(&px->pForward[i]->key, pKey) < 0)) { - px = px->pForward[i]; - } - - pSkipList->state.nTotalStepsForInsert++; - forward[i] = px; - } - - // if the skiplist does not allowed identical key inserted, the new data will be discarded. - if ((insertIdenticalKey == 0) && forward[0] != &pSkipList->pHead && - (pSkipList->comparator(&forward[0]->key, pKey) == 0)) { - pthread_rwlock_unlock(&pSkipList->lock); - return forward[0]; - } - - int32_t nLevel = getSkipListNodeLevel(pSkipList); - recordNodeEachLevel(pSkipList, nLevel); - - tSkipListNode *pNode = SSkipListCreateNode(pData, pKey, nLevel); - tSkipListDoInsert(pSkipList, forward, nLevel, pNode); - - pSkipList->nSize += 1; - - // char tmpstr[512] = {0}; - // tVariantToString(&pNode->key, tmpstr); - // pTrace("skiplist:%p, node added, key:%s, total list len:%d", pSkipList, - // tmpstr, pSkipList->nSize); - - pSkipList->state.nTotalMemSize += getOneNodeSize(pKey, nLevel); - pthread_rwlock_unlock(&pSkipList->lock); - - return pNode; -} - -void tSkipListDoInsert(tSkipList *pSkipList, tSkipListNode **forward, int32_t nLevel, tSkipListNode *pNode) { - for (int32_t i = 0; i < nLevel; ++i) { - tSkipListNode *x = forward[i]; - if (x != NULL) { - pNode->pBackward[i] = x; - if (x->pForward[i]) x->pForward[i]->pBackward[i] = pNode; - - pNode->pForward[i] = x->pForward[i]; - x->pForward[i] = pNode; - } else { - pSkipList->pHead.pForward[i] = pNode; - pNode->pBackward[i] = &(pSkipList->pHead); - } - } -} - -tSkipListNode *tSkipListGetOne(tSkipList *pSkipList, tSkipListKey *pKey) { - int32_t sLevel = pSkipList->nLevel - 1; - int32_t ret = -1; - - tSkipListNode *x = &pSkipList->pHead; - - pthread_rwlock_rdlock(&pSkipList->lock); - pSkipList->state.queryCount++; - - __compar_fn_t filterComparator = getKeyFilterComparator(pSkipList, pKey->nType); - - for (int32_t i = sLevel; i >= 0; --i) { - while (x->pForward[i] != NULL && (ret = filterComparator(&x->pForward[i]->key, pKey)) < 0) { - x = x->pForward[i]; - } - - if (ret == 0) { - pthread_rwlock_unlock(&pSkipList->lock); - return x->pForward[i]; - } - } - - pthread_rwlock_unlock(&pSkipList->lock); - return NULL; -} - -static int32_t tSkipListEndParQuery(tSkipList *pSkipList, tSkipListNode *pStartNode, tSkipListKey *pEndKey, - int32_t cond, tSkipListNode ***pRes) { - pthread_rwlock_rdlock(&pSkipList->lock); - tSkipListNode *p = pStartNode; - int32_t numOfRes = 0; - - __compar_fn_t filterComparator = getKeyFilterComparator(pSkipList, pEndKey->nType); - while (p != NULL) { - int32_t ret = filterComparator(&p->key, pEndKey); - if (ret > 0) { - break; - } - - if (ret < 0) { - numOfRes++; - p = p->pForward[0]; - } else if (ret == 0) { - if (cond == TSDB_RELATION_LESS_EQUAL) { - numOfRes++; - p = p->pForward[0]; - } else { - break; - } - } - } - - (*pRes) = (tSkipListNode **)malloc(POINTER_BYTES * numOfRes); - for (int32_t i = 0; i < numOfRes; ++i) { - (*pRes)[i] = pStartNode; - pStartNode = pStartNode->pForward[0]; - } - pthread_rwlock_unlock(&pSkipList->lock); - - return numOfRes; -} - -/* - * maybe return the copy of tSkipListNode would be better - */ -int32_t tSkipListGets(tSkipList *pSkipList, tSkipListKey *pKey, tSkipListNode ***pRes) { - (*pRes) = NULL; - - tSkipListNode *pNode = tSkipListGetOne(pSkipList, pKey); - if (pNode == NULL) { - return 0; - } - - __compar_fn_t filterComparator = getKeyFilterComparator(pSkipList, pKey->nType); - - // backward check if previous nodes are with the same value. - tSkipListNode *pPrev = pNode->pBackward[0]; - while ((pPrev != &pSkipList->pHead) && filterComparator(&pPrev->key, pKey) == 0) { - pPrev = pPrev->pBackward[0]; - } - - return tSkipListEndParQuery(pSkipList, pPrev->pForward[0], &pNode->key, TSDB_RELATION_LESS_EQUAL, pRes); -} - -static tSkipListNode *tSkipListParQuery(tSkipList *pSkipList, tSkipListKey *pKey, int32_t cond) { - int32_t sLevel = pSkipList->nLevel - 1; - int32_t ret = -1; - - tSkipListNode *x = &pSkipList->pHead; - __compar_fn_t filterComparator = getKeyFilterComparator(pSkipList, pKey->nType); - - pthread_rwlock_rdlock(&pSkipList->lock); - - if (cond == TSDB_RELATION_LARGE_EQUAL || cond == TSDB_RELATION_LARGE) { - for (int32_t i = sLevel; i >= 0; --i) { - while (x->pForward[i] != NULL && (ret = filterComparator(&x->pForward[i]->key, pKey)) < 0) { - x = x->pForward[i]; - } - } - - // backward check if previous nodes are with the same value. - if (cond == TSDB_RELATION_LARGE_EQUAL && ret == 0) { - tSkipListNode *pNode = x->pForward[0]; - while ((pNode->pBackward[0] != &pSkipList->pHead) && (filterComparator(&pNode->pBackward[0]->key, pKey) == 0)) { - pNode = pNode->pBackward[0]; - } - pthread_rwlock_unlock(&pSkipList->lock); - return pNode; - } - - if (ret > 0 || cond == TSDB_RELATION_LARGE_EQUAL) { - pthread_rwlock_unlock(&pSkipList->lock); - return x->pForward[0]; - } else { // cond == TSDB_RELATION_LARGE && ret == 0 - tSkipListNode *pn = x->pForward[0]; - while (pn != NULL && filterComparator(&pn->key, pKey) == 0) { - pn = pn->pForward[0]; - } - pthread_rwlock_unlock(&pSkipList->lock); - return pn; - } - } - - pthread_rwlock_unlock(&pSkipList->lock); - return NULL; -} - -int32_t tSkipListIterateList(tSkipList *pSkipList, tSkipListNode ***pRes, bool (*fp)(tSkipListNode *, void *), - void *param) { - (*pRes) = (tSkipListNode **)calloc(1, POINTER_BYTES * pSkipList->nSize); - if (NULL == *pRes) { - pError("error skiplist %p, malloc failed", pSkipList); - return -1; - } - - pthread_rwlock_rdlock(&pSkipList->lock); - tSkipListNode *pStartNode = pSkipList->pHead.pForward[0]; - int32_t num = 0; - - for (int32_t i = 0; i < pSkipList->nSize; ++i) { - if (pStartNode == NULL) { - pError("error skiplist %p, required length:%d, actual length:%d", pSkipList, pSkipList->nSize, i - 1); -#ifdef _DEBUG_VIEW - SSkipListPrint(pSkipList, 1); -#endif - break; - } - - if (fp == NULL || (fp != NULL && fp(pStartNode, param) == true)) { - (*pRes)[num++] = pStartNode; - } - - pStartNode = pStartNode->pForward[0]; - } - - pthread_rwlock_unlock(&pSkipList->lock); - - if (num == 0) { - free(*pRes); - *pRes = NULL; - } else if (num < pSkipList->nSize) { // free unused memory - char* tmp = realloc((*pRes), num * POINTER_BYTES); - assert(tmp != NULL); - - *pRes = (tSkipListNode**)tmp; - } - - return num; -} - -int32_t tSkipListIteratorReset(tSkipList *pSkipList, SSkipListIterator* iter) { - if (pSkipList == NULL) { - return -1; - } - - iter->pSkipList = pSkipList; - - pthread_rwlock_rdlock(&pSkipList->lock); - iter->cur = NULL;//pSkipList->pHead.pForward[0]; - iter->num = pSkipList->nSize; - pthread_rwlock_unlock(&pSkipList->lock); - - return 0; -} - -bool tSkipListIteratorNext(SSkipListIterator* iter) { - if (iter->num == 0 || iter->pSkipList == NULL) { - return false; - } - - tSkipList* pSkipList = iter->pSkipList; - - pthread_rwlock_rdlock(&pSkipList->lock); - if (iter->cur == NULL) { - iter->cur = pSkipList->pHead.pForward[0]; - } else { - iter->cur = iter->cur->pForward[0]; - } - - pthread_rwlock_unlock(&pSkipList->lock); - - return iter->cur != NULL; -} - -tSkipListNode* tSkipListIteratorGet(SSkipListIterator* iter) { - return iter->cur; -} - -int32_t tSkipListRangeQuery(tSkipList *pSkipList, tSKipListQueryCond *pCond, tSkipListNode ***pRes) { - pSkipList->state.queryCount++; - tSkipListNode *pStart = tSkipListParQuery(pSkipList, &pCond->lowerBnd, pCond->lowerBndRelOptr); - if (pStart == 0) { - *pRes = NULL; - return 0; - } - - return tSkipListEndParQuery(pSkipList, pStart, &pCond->upperBnd, pCond->upperBndRelOptr, pRes); -} - -static bool removeSupport(tSkipList *pSkipList, tSkipListNode **forward, tSkipListKey *pKey) { - __compar_fn_t filterComparator = getKeyFilterComparator(pSkipList, pKey->nType); - - if (filterComparator(&forward[0]->pForward[0]->key, pKey) == 0) { - tSkipListNode *p = forward[0]->pForward[0]; - doRemove(pSkipList, p, forward); - } else { // failed to find the node of specified value,abort - return false; - } - - // compress the minimum level of skip list - while (pSkipList->nLevel > 0 && pSkipList->pHead.pForward[pSkipList->nLevel - 1] == NULL) { - pSkipList->nLevel -= 1; - } - - return true; -} - -void tSkipListRemoveNode(tSkipList *pSkipList, tSkipListNode *pNode) { - tSkipListNode *forward[MAX_SKIP_LIST_LEVEL] = {0}; - - pthread_rwlock_rdlock(&pSkipList->lock); - for (int32_t i = 0; i < pNode->nLevel; ++i) { - forward[i] = pNode->pBackward[i]; - } - - removeSupport(pSkipList, forward, &pNode->key); - pthread_rwlock_unlock(&pSkipList->lock); -} - -bool tSkipListRemove(tSkipList *pSkipList, tSkipListKey *pKey) { - tSkipListNode *forward[MAX_SKIP_LIST_LEVEL] = {0}; - __compar_fn_t filterComparator = getKeyFilterComparator(pSkipList, pKey->nType); - - pthread_rwlock_rdlock(&pSkipList->lock); - - tSkipListNode *x = &pSkipList->pHead; - for (int32_t i = pSkipList->nLevel - 1; i >= 0; --i) { - while (x->pForward[i] != NULL && (filterComparator(&x->pForward[i]->key, pKey) < 0)) { - x = x->pForward[i]; - } - forward[i] = x; - } - - bool ret = removeSupport(pSkipList, forward, pKey); - pthread_rwlock_unlock(&pSkipList->lock); - - return ret; -} - -void SSkipListPrint(tSkipList *pSkipList, int16_t nlevel) { - if (pSkipList == NULL || pSkipList->nLevel < nlevel || nlevel <= 0) { - return; - } - - tSkipListNode *p = pSkipList->pHead.pForward[nlevel - 1]; - int32_t id = 1; - while (p) { - switch (pSkipList->keyType) { - case TSDB_DATA_TYPE_INT: - case TSDB_DATA_TYPE_SMALLINT: - case TSDB_DATA_TYPE_TINYINT: - case TSDB_DATA_TYPE_BIGINT: - fprintf(stdout, "%d: %" PRId64 " \n", id++, p->key.i64Key); - break; - case TSDB_DATA_TYPE_BINARY: - fprintf(stdout, "%d: %s \n", id++, p->key.pz); - break; - case TSDB_DATA_TYPE_DOUBLE: - fprintf(stdout, "%d: %lf \n", id++, p->key.dKey); - break; - default: - fprintf(stdout, "\n"); - } - p = p->pForward[nlevel - 1]; - } -} - -/* - * query processor based on query condition - */ -int32_t tSkipListQuery(tSkipList *pSkipList, tSKipListQueryCond *pQueryCond, tSkipListNode ***pResult) { - // query condition check - int32_t rel = 0; - __compar_fn_t comparator = getKeyComparator(pQueryCond->lowerBnd.nType); - - if (pSkipList == NULL || pQueryCond == NULL || pSkipList->nSize == 0 || - (((rel = comparator(&pQueryCond->lowerBnd, &pQueryCond->upperBnd)) > 0 && - pQueryCond->lowerBnd.nType != TSDB_DATA_TYPE_NCHAR && pQueryCond->lowerBnd.nType != TSDB_DATA_TYPE_BINARY))) { - (*pResult) = NULL; - return 0; - } - - if (rel == 0) { - /* - * 0 means: pQueryCond->lowerBnd == pQueryCond->upperBnd - * point query - */ - if (pQueryCond->lowerBndRelOptr == TSDB_RELATION_LARGE_EQUAL && - pQueryCond->upperBndRelOptr == TSDB_RELATION_LESS_EQUAL) { // point query - return tSkipListGets(pSkipList, &pQueryCond->lowerBnd, pResult); - } else { - (*pResult) = NULL; - return 0; - } - } else { - /* range query, query operation code check */ - return tSkipListRangeQuery(pSkipList, pQueryCond, pResult); - } -} - -typedef struct MultipleQueryResult { - int32_t len; - tSkipListNode **pData; -} MultipleQueryResult; - -static int32_t mergeQueryResult(MultipleQueryResult *pResults, int32_t numOfResSet, tSkipListNode ***pRes) { - int32_t total = 0; - for (int32_t i = 0; i < numOfResSet; ++i) { - total += pResults[i].len; - } - - (*pRes) = malloc(POINTER_BYTES * total); - int32_t idx = 0; - - for (int32_t i = 0; i < numOfResSet; ++i) { - MultipleQueryResult *pOneResult = &pResults[i]; - for (int32_t j = 0; j < pOneResult->len; ++j) { - (*pRes)[idx++] = pOneResult->pData[j]; - } - } - - return total; -} - -static void removeDuplicateKey(tSkipListKey *pKey, int32_t *numOfKey, __compar_fn_t comparator) { - if (*numOfKey == 1) { - return; - } - - qsort(pKey, *numOfKey, sizeof(pKey[0]), comparator); - int32_t i = 0, j = 1; - - while (i < (*numOfKey) && j < (*numOfKey)) { - int32_t ret = comparator(&pKey[i], &pKey[j]); - if (ret == 0) { - j++; - } else { - pKey[i + 1] = pKey[j]; - i++; - j++; - } - } - - (*numOfKey) = i + 1; -} - -int32_t mergeResult(const tSkipListKey *pKey, int32_t numOfKey, tSkipListNode ***pRes, __compar_fn_t comparator, - tSkipListNode *pNode) { - int32_t i = 0, j = 0; - // merge two sorted arrays in O(n) time - while (i < numOfKey && pNode != NULL) { - int32_t ret = comparator(&pNode->key, &pKey[i]); - if (ret < 0) { - (*pRes)[j++] = pNode; - pNode = pNode->pForward[0]; - } else if (ret == 0) { - pNode = pNode->pForward[0]; - } else { // pNode->key > pkey[i] - i++; - } - } - - while (pNode != NULL) { - (*pRes)[j++] = pNode; - pNode = pNode->pForward[0]; - } - return j; -} - -int32_t tSkipListPointQuery(tSkipList *pSkipList, tSkipListKey *pKey, int32_t numOfKey, tSkipListPointQueryType type, - tSkipListNode ***pRes) { - if (numOfKey == 0 || pKey == NULL || pSkipList == NULL || pSkipList->nSize == 0 || - (type != INCLUDE_POINT_QUERY && type != EXCLUDE_POINT_QUERY)) { - (*pRes) = NULL; - return 0; - } - - __compar_fn_t comparator = getKeyComparator(pKey->nType); - removeDuplicateKey(pKey, &numOfKey, comparator); - - if (type == INCLUDE_POINT_QUERY) { - if (numOfKey == 1) { - return tSkipListGets(pSkipList, &pKey[0], pRes); - } else { - MultipleQueryResult *pTempResult = (MultipleQueryResult *)malloc(sizeof(MultipleQueryResult) * numOfKey); - for (int32_t i = 0; i < numOfKey; ++i) { - pTempResult[i].len = tSkipListGets(pSkipList, &pKey[i], &pTempResult[i].pData); - } - int32_t num = mergeQueryResult(pTempResult, numOfKey, pRes); - - for (int32_t i = 0; i < numOfKey; ++i) { - free(pTempResult[i].pData); - } - free(pTempResult); - return num; - } - } else { // exclude query - *pRes = malloc(POINTER_BYTES * pSkipList->nSize); - - __compar_fn_t filterComparator = getKeyFilterComparator(pSkipList, pKey->nType); - - tSkipListNode *pNode = pSkipList->pHead.pForward[0]; - int32_t retLen = mergeResult(pKey, numOfKey, pRes, filterComparator, pNode); - - if (retLen < pSkipList->nSize) { - (*pRes) = realloc(*pRes, POINTER_BYTES * retLen); - } - return retLen; - } -} - -#endif \ No newline at end of file diff --git a/src/util/src/tarray.c b/src/util/src/tarray.c index 4ed2da0567be79ac019e2dfeea4a045601a1c430..fb2dac827e13d4526b6b7a6c358a34dd575eb941 100755 --- a/src/util/src/tarray.c +++ b/src/util/src/tarray.c @@ -98,7 +98,7 @@ void* taosArrayGetP(SArray* pArray, size_t index) { return *(void**)ret; } -size_t taosArrayGetSize(SArray* pArray) { return pArray->size; } +size_t taosArrayGetSize(const SArray* pArray) { return pArray->size; } void* taosArrayInsert(SArray* pArray, size_t index, void* pData) { if (pArray == NULL || pData == NULL) { diff --git a/src/util/src/tcache.c b/src/util/src/tcache.c new file mode 100644 index 0000000000000000000000000000000000000000..c8c3879f0a34c5236a7b691bbc08ed05a08502cd --- /dev/null +++ b/src/util/src/tcache.c @@ -0,0 +1,557 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tcache.h" +#include "hash.h" +#include "hashfunc.h" + +#include "tlog.h" +#include "ttime.h" +#include "ttimer.h" +#include "tutil.h" + +static FORCE_INLINE void __cache_wr_lock(SCacheObj *pCacheObj) { +#if defined(LINUX) + pthread_rwlock_wrlock(&pCacheObj->lock); +#else + pthread_mutex_lock(&pCacheObj->lock); +#endif +} + +static FORCE_INLINE void __cache_rd_lock(SCacheObj *pCacheObj) { +#if defined(LINUX) + pthread_rwlock_rdlock(&pCacheObj->lock); +#else + pthread_mutex_lock(&pCacheObj->lock); +#endif +} + +static FORCE_INLINE void __cache_unlock(SCacheObj *pCacheObj) { +#if defined(LINUX) + pthread_rwlock_unlock(&pCacheObj->lock); +#else + pthread_mutex_unlock(&pCacheObj->lock); +#endif +} + +static FORCE_INLINE int32_t __cache_lock_init(SCacheObj *pCacheObj) { +#if defined(LINUX) + return pthread_rwlock_init(&pCacheObj->lock, NULL); +#else + return pthread_mutex_init(&pCacheObj->lock, NULL); +#endif +} + +static FORCE_INLINE void __cache_lock_destroy(SCacheObj *pCacheObj) { +#if defined(LINUX) + pthread_rwlock_destroy(&pCacheObj->lock); +#else + pthread_mutex_destroy(&pCacheObj->lock); +#endif +} + +static FORCE_INLINE void taosFreeNode(void *data) { + SCacheDataNode *pNode = *(SCacheDataNode **)data; + free(pNode); +} + +/** + * @param key key of object for hash, usually a null-terminated string + * @param keyLen length of key + * @param pData actually data. required a consecutive memory block, no pointer is allowed + * in pData. Pointer copy causes memory access error. + * @param size size of block + * @param lifespan total survial expiredTime from now + * @return SCacheDataNode + */ +static SCacheDataNode *taosCreateHashNode(const char *key, size_t keyLen, const char *pData, size_t size, + uint64_t duration) { + size_t totalSize = size + sizeof(SCacheDataNode) + keyLen; + + SCacheDataNode *pNewNode = calloc(1, totalSize); + if (pNewNode == NULL) { + pError("failed to allocate memory, reason:%s", strerror(errno)); + return NULL; + } + + memcpy(pNewNode->data, pData, size); + + pNewNode->key = (char *)pNewNode + sizeof(SCacheDataNode) + size; + pNewNode->keySize = keyLen; + + memcpy(pNewNode->key, key, keyLen); + + pNewNode->addedTime = (uint64_t)taosGetTimestampMs(); + pNewNode->expiredTime = pNewNode->addedTime + duration; + + pNewNode->signature = (uint64_t)pNewNode; + pNewNode->size = (uint32_t)totalSize; + + return pNewNode; +} + +/** + * addedTime object node into trash, and this object is closed for referencing if it is addedTime to trash + * It will be removed until the pNode->refCount == 0 + * @param pCacheObj Cache object + * @param pNode Cache slot object + */ +static void taosAddToTrash(SCacheObj *pCacheObj, SCacheDataNode *pNode) { + if (pNode->inTrash) { /* node is already in trash */ + return; + } + + STrashElem *pElem = calloc(1, sizeof(STrashElem)); + pElem->pData = pNode; + + pElem->next = pCacheObj->pTrash; + if (pCacheObj->pTrash) { + pCacheObj->pTrash->prev = pElem; + } + + pElem->prev = NULL; + pCacheObj->pTrash = pElem; + + pNode->inTrash = true; + pCacheObj->numOfElemsInTrash++; + + pTrace("key:%s %p move to trash, numOfElem in trash:%d", pNode->key, pNode, pCacheObj->numOfElemsInTrash); +} + +static void taosRemoveFromTrash(SCacheObj *pCacheObj, STrashElem *pElem) { + if (pElem->pData->signature != (uint64_t)pElem->pData) { + pError("key:sig:%d %p data has been released, ignore", pElem->pData->signature, pElem->pData); + return; + } + + pCacheObj->numOfElemsInTrash--; + if (pElem->prev) { + pElem->prev->next = pElem->next; + } else { /* pnode is the header, update header */ + pCacheObj->pTrash = pElem->next; + } + + if (pElem->next) { + pElem->next->prev = pElem->prev; + } + + pElem->pData->signature = 0; + free(pElem->pData); + free(pElem); +} +/** + * remove nodes in trash with refCount == 0 in cache + * @param pNode + * @param pCacheObj + * @param force force model, if true, remove data in trash without check refcount. + * may cause corruption. So, forece model only applys before cache is closed + */ +static void taosTrashEmpty(SCacheObj *pCacheObj, bool force) { + __cache_wr_lock(pCacheObj); + + if (pCacheObj->numOfElemsInTrash == 0) { + if (pCacheObj->pTrash != NULL) { + pError("key:inconsistency data in cache, numOfElem in trash:%d", pCacheObj->numOfElemsInTrash); + } + pCacheObj->pTrash = NULL; + + __cache_unlock(pCacheObj); + return; + } + + STrashElem *pElem = pCacheObj->pTrash; + + while (pElem) { + T_REF_VAL_CHECK(pElem->pData); + if (pElem->next == pElem) { + pElem->next = NULL; + } + + if (force || (T_REF_VAL_GET(pElem->pData) == 0)) { + pTrace("key:%s %p removed from trash. numOfElem in trash:%d", pElem->pData->key, pElem->pData, + pCacheObj->numOfElemsInTrash - 1); + STrashElem *p = pElem; + + pElem = pElem->next; + taosRemoveFromTrash(pCacheObj, p); + } else { + pElem = pElem->next; + } + } + + assert(pCacheObj->numOfElemsInTrash >= 0); + __cache_unlock(pCacheObj); +} + +/** + * release node + * @param pCacheObj cache object + * @param pNode data node + */ +static FORCE_INLINE void taosCacheReleaseNode(SCacheObj *pCacheObj, SCacheDataNode *pNode) { + if (pNode->signature != (uint64_t)pNode) { + pError("key:%s, %p data is invalid, or has been released", pNode->key, pNode); + return; + } + + int32_t size = pNode->size; + taosHashRemove(pCacheObj->pHashTable, pNode->key, pNode->keySize); + + pTrace("key:%s is removed from cache,total:%d,size:%ldbytes", pNode->key, pCacheObj->totalSize, size); + free(pNode); +} + +/** + * move the old node into trash + * @param pCacheObj + * @param pNode + */ +static FORCE_INLINE void taosCacheMoveToTrash(SCacheObj *pCacheObj, SCacheDataNode *pNode) { + taosHashRemove(pCacheObj->pHashTable, pNode->key, pNode->keySize); + taosAddToTrash(pCacheObj, pNode); +} + +/** + * update data in cache + * @param pCacheObj + * @param pNode + * @param key + * @param keyLen + * @param pData + * @param dataSize + * @return + */ +static SCacheDataNode *taosUpdateCacheImpl(SCacheObj *pCacheObj, SCacheDataNode *pNode, char *key, int32_t keyLen, + void *pData, uint32_t dataSize, uint64_t duration) { + SCacheDataNode *pNewNode = NULL; + + // only a node is not referenced by any other object, in-place update it + if (T_REF_VAL_GET(pNode) == 0) { + size_t newSize = sizeof(SCacheDataNode) + dataSize + keyLen; + + pNewNode = (SCacheDataNode *)realloc(pNode, newSize); + if (pNewNode == NULL) { + return NULL; + } + + pNewNode->signature = (uint64_t)pNewNode; + memcpy(pNewNode->data, pData, dataSize); + + pNewNode->key = (char *)pNewNode + sizeof(SCacheDataNode) + dataSize; + pNewNode->keySize = keyLen; + memcpy(pNewNode->key, key, keyLen); + + // update the timestamp information for updated key/value + pNewNode->addedTime = taosGetTimestampMs(); + pNewNode->expiredTime = pNewNode->addedTime + duration; + + T_REF_INC(pNewNode); + + // the address of this node may be changed, so the prev and next element should update the corresponding pointer + taosHashPut(pCacheObj->pHashTable, key, keyLen, &pNewNode, sizeof(void *)); + } else { + taosCacheMoveToTrash(pCacheObj, pNode); + + pNewNode = taosCreateHashNode(key, keyLen, pData, dataSize, duration); + if (pNewNode == NULL) { + return NULL; + } + + T_REF_INC(pNewNode); + + // addedTime new element to hashtable + taosHashPut(pCacheObj->pHashTable, key, keyLen, &pNewNode, sizeof(void *)); + } + + return pNewNode; +} + +/** + * addedTime data into hash table + * @param key + * @param pData + * @param size + * @param pCacheObj + * @param keyLen + * @param pNode + * @return + */ +static FORCE_INLINE SCacheDataNode *taosAddToCacheImpl(SCacheObj *pCacheObj, char *key, size_t keyLen, const void *pData, + size_t dataSize, uint64_t duration) { + SCacheDataNode *pNode = taosCreateHashNode(key, keyLen, pData, dataSize, duration); + if (pNode == NULL) { + return NULL; + } + + T_REF_INC(pNode); + taosHashPut(pCacheObj->pHashTable, key, keyLen, &pNode, sizeof(void *)); + return pNode; +} + +static void doCleanupDataCache(SCacheObj *pCacheObj) { + __cache_wr_lock(pCacheObj); + + if (taosHashGetSize(pCacheObj->pHashTable) > 0) { + taosHashCleanup(pCacheObj->pHashTable); + } + + __cache_unlock(pCacheObj); + + taosTrashEmpty(pCacheObj, true); + __cache_lock_destroy(pCacheObj); + + memset(pCacheObj, 0, sizeof(SCacheObj)); + free(pCacheObj); +} + +/** + * refresh cache to remove data in both hash list and trash, if any nodes' refcount == 0, every pCacheObj->refreshTime + * @param handle Cache object handle + */ +static void taosCacheRefresh(void *handle, void *tmrId) { + SCacheObj *pCacheObj = (SCacheObj *)handle; + + if (pCacheObj == NULL || taosHashGetSize(pCacheObj->pHashTable) == 0) { + pTrace("object is destroyed. no refresh retry"); + return; + } + + if (pCacheObj->deleting == 1) { + doCleanupDataCache(pCacheObj); + return; + } + + uint64_t expiredTime = taosGetTimestampMs(); + pCacheObj->statistics.refreshCount++; + + SHashMutableIterator *pIter = taosHashCreateIter(pCacheObj->pHashTable); + + __cache_wr_lock(pCacheObj); + while (taosHashIterNext(pIter)) { + if (pCacheObj->deleting == 1) { + taosHashDestroyIter(pIter); + break; + } + + SCacheDataNode *pNode = *(SCacheDataNode **)taosHashIterGet(pIter); + if (pNode->expiredTime <= expiredTime && T_REF_VAL_GET(pNode) <= 0) { + taosCacheReleaseNode(pCacheObj, pNode); + } + } + + __cache_unlock(pCacheObj); + + taosHashDestroyIter(pIter); + + if (pCacheObj->deleting == 1) { // clean up resources and abort + doCleanupDataCache(pCacheObj); + } else { + taosTrashEmpty(pCacheObj, false); + taosTmrReset(taosCacheRefresh, pCacheObj->refreshTime, pCacheObj, pCacheObj->tmrCtrl, &pCacheObj->pTimer); + } +} + +SCacheObj *taosCacheInit(void *tmrCtrl, int64_t refreshTime) { + if (tmrCtrl == NULL || refreshTime <= 0) { + return NULL; + } + + SCacheObj *pCacheObj = (SCacheObj *)calloc(1, sizeof(SCacheObj)); + if (pCacheObj == NULL) { + pError("failed to allocate memory, reason:%s", strerror(errno)); + return NULL; + } + + pCacheObj->pHashTable = taosHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false); + if (pCacheObj->pHashTable == NULL) { + free(pCacheObj); + pError("failed to allocate memory, reason:%s", strerror(errno)); + return NULL; + } + + // set free cache node callback function for hash table + taosHashSetFreecb(pCacheObj->pHashTable, taosFreeNode); + + pCacheObj->refreshTime = refreshTime * 1000; + pCacheObj->tmrCtrl = tmrCtrl; + + taosTmrReset(taosCacheRefresh, pCacheObj->refreshTime, pCacheObj, pCacheObj->tmrCtrl, &pCacheObj->pTimer); + + if (__cache_lock_init(pCacheObj) != 0) { + taosTmrStopA(&pCacheObj->pTimer); + taosHashCleanup(pCacheObj->pHashTable); + free(pCacheObj); + + pError("failed to init lock, reason:%s", strerror(errno)); + return NULL; + } + + return pCacheObj; +} + +void *taosCachePut(SCacheObj *pCacheObj, char *key, void *pData, size_t dataSize, int duration) { + SCacheDataNode *pNode; + + if (pCacheObj == NULL || pCacheObj->pHashTable == NULL) { + return NULL; + } + + size_t keyLen = strlen(key); + + __cache_wr_lock(pCacheObj); + SCacheDataNode **pt = (SCacheDataNode **)taosHashGet(pCacheObj->pHashTable, key, keyLen); + SCacheDataNode * pOld = (pt != NULL) ? (*pt) : NULL; + + if (pOld == NULL) { // do addedTime to cache + pNode = taosAddToCacheImpl(pCacheObj, key, keyLen, pData, dataSize, duration * 1000L); + if (NULL != pNode) { + pCacheObj->totalSize += pNode->size; + + pTrace("key:%s %p added into cache, added:%" PRIu64 ", expire:%" PRIu64 ", total:%d, size:%" PRId64 " bytes", + key, pNode, pNode->addedTime, pNode->expiredTime, pCacheObj->totalSize, dataSize); + } + } else { // old data exists, update the node + pNode = taosUpdateCacheImpl(pCacheObj, pOld, key, keyLen, pData, dataSize, duration * 1000L); + pTrace("key:%s %p exist in cache, updated", key, pNode); + } + + __cache_unlock(pCacheObj); + + return (pNode != NULL) ? pNode->data : NULL; +} + +void *taosCacheAcquireByName(SCacheObj *pCacheObj, const char *key) { + if (pCacheObj == NULL || taosHashGetSize(pCacheObj->pHashTable) == 0) { + return NULL; + } + + uint32_t keyLen = (uint32_t)strlen(key); + + __cache_rd_lock(pCacheObj); + + SCacheDataNode **ptNode = (SCacheDataNode **)taosHashGet(pCacheObj->pHashTable, key, keyLen); + if (ptNode != NULL) { + T_REF_INC(*ptNode); + } + + __cache_unlock(pCacheObj); + + if (ptNode != NULL) { + atomic_add_fetch_32(&pCacheObj->statistics.hitCount, 1); + pTrace("key:%s is retrieved from cache,refcnt:%d", key, T_REF_VAL_GET(*ptNode)); + } else { + atomic_add_fetch_32(&pCacheObj->statistics.missCount, 1); + pTrace("key:%s not in cache,retrieved failed", key); + } + + atomic_add_fetch_32(&pCacheObj->statistics.totalAccess, 1); + return (ptNode != NULL) ? (*ptNode)->data : NULL; +} + +void *taosCacheAcquireByData(SCacheObj *pCacheObj, void *data) { + if (pCacheObj == NULL || data == NULL) return NULL; + + size_t offset = offsetof(SCacheDataNode, data); + SCacheDataNode *ptNode = (SCacheDataNode *)((char *)data - offset); + + if (ptNode->signature != (uint64_t)ptNode) { + pError("key: %p the data from cache is invalid", ptNode); + return NULL; + } + + int32_t ref = T_REF_INC(ptNode); + pTrace("%p addedTime ref data in cache, refCnt:%d", data, ref) + + // the data if referenced by at least one object, so the reference count must be greater than the value of 2. + assert(ref >= 2); + return data; +} + +void *taosCacheTransfer(SCacheObj *pCacheObj, void **data) { + if (pCacheObj == NULL || data == NULL) return NULL; + + size_t offset = offsetof(SCacheDataNode, data); + SCacheDataNode *ptNode = (SCacheDataNode *)((char *)(*data) - offset); + + if (ptNode->signature != (uint64_t)ptNode) { + pError("key: %p the data from cache is invalid", ptNode); + return NULL; + } + + assert(T_REF_VAL_GET(ptNode) >= 1); + + char *d = *data; + + // clear its reference to old area + *data = NULL; + + return d; +} + +void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove) { + if (pCacheObj == NULL || (*data) == NULL || (taosHashGetSize(pCacheObj->pHashTable) + pCacheObj->numOfElemsInTrash == 0)) { + return; + } + + size_t offset = offsetof(SCacheDataNode, data); + + SCacheDataNode *pNode = (SCacheDataNode *)((char *)(*data) - offset); + + if (pNode->signature != (uint64_t)pNode) { + pError("key: %p release invalid cache data", pNode); + return; + } + + *data = NULL; + + if (_remove) { + __cache_wr_lock(pCacheObj); + // pNode may be released immediately by other thread after the reference count of pNode is set to 0, + // So we need to lock it in the first place. + T_REF_DEC(pNode); + taosCacheMoveToTrash(pCacheObj, pNode); + + __cache_unlock(pCacheObj); + } else { + T_REF_DEC(pNode); + } +} + +void taosCacheEmpty(SCacheObj *pCacheObj) { + SHashMutableIterator *pIter = taosHashCreateIter(pCacheObj->pHashTable); + + __cache_wr_lock(pCacheObj); + while (taosHashIterNext(pIter)) { + if (pCacheObj->deleting == 1) { + taosHashDestroyIter(pIter); + break; + } + + SCacheDataNode *pNode = *(SCacheDataNode **)taosHashIterGet(pIter); + taosCacheMoveToTrash(pCacheObj, pNode); + } + __cache_unlock(pCacheObj); + + taosHashDestroyIter(pIter); + taosTrashEmpty(pCacheObj, false); +} + +void taosCacheCleanup(SCacheObj *pCacheObj) { + if (pCacheObj == NULL) { + return; + } + + pCacheObj->deleting = 1; +} diff --git a/src/util/src/tglobalcfg.c b/src/util/src/tglobalcfg.c index a49873d30a83c02c2faa4bebbca36190d31a389f..bbea30207cfbb275caf37673afd925bea63559d7 100644 --- a/src/util/src/tglobalcfg.c +++ b/src/util/src/tglobalcfg.c @@ -61,9 +61,10 @@ int64_t tsMsPerDay[] = {86400000L, 86400000000L}; char tsMasterIp[TSDB_IPv4ADDR_LEN] = {0}; char tsSecondIp[TSDB_IPv4ADDR_LEN] = {0}; -uint16_t tsMgmtShellPort = 6030; // udp[6030-6034] tcp[6030] -uint16_t tsVnodeShellPort = 6035; // udp[6035-6039] tcp[6035] -uint16_t tsMgmtDnodePort = 6040; // udp[6040-6044] tcp[6040] +uint16_t tsMnodeShellPort = 6030; // udp[6030-6034] tcp[6030] +uint16_t tsDnodeShellPort = 6035; // udp[6035-6039] tcp[6035] +uint16_t tsMnodeDnodePort = 6040; // udp/tcp +uint16_t tsDnodeMnodePort = 6041; // udp/tcp uint16_t tsVnodeVnodePort = 6045; // tcp[6045] uint16_t tsMgmtMgmtPort = 6050; // udp, numOfVnodes fixed to 1, range udp[6050] uint16_t tsMgmtSyncPort = 6050; // tcp, range tcp[6050] @@ -134,7 +135,7 @@ int tsOfflineThreshold = 864000; // seconds 10days int tsMgmtEqualVnodeNum = 4; int tsEnableHttpModule = 1; -int tsEnableMonitorModule = 1; +int tsEnableMonitorModule = 0; int tsRestRowLimit = 10240; int tsMaxSQLStringLen = TSDB_MAX_SQL_LEN; @@ -492,13 +493,13 @@ static void doInitGlobalConfig() { tsInitConfigOption(cfg++, "httpPort", &tsHttpPort, TSDB_CFG_VTYPE_SHORT, TSDB_CFG_CTYPE_B_CONFIG | TSDB_CFG_CTYPE_B_SHOW, 1, 65535, 0, TSDB_CFG_UTYPE_NONE); - tsInitConfigOption(cfg++, "mgmtShellPort", &tsMgmtShellPort, TSDB_CFG_VTYPE_SHORT, + tsInitConfigOption(cfg++, "mgmtShellPort", &tsMnodeShellPort, TSDB_CFG_VTYPE_SHORT, TSDB_CFG_CTYPE_B_CONFIG | TSDB_CFG_CTYPE_B_SHOW | TSDB_CFG_CTYPE_B_CLIENT, 1, 65535, 0, TSDB_CFG_UTYPE_NONE); - tsInitConfigOption(cfg++, "vnodeShellPort", &tsVnodeShellPort, TSDB_CFG_VTYPE_SHORT, + tsInitConfigOption(cfg++, "vnodeShellPort", &tsDnodeShellPort, TSDB_CFG_VTYPE_SHORT, TSDB_CFG_CTYPE_B_CONFIG | TSDB_CFG_CTYPE_B_SHOW | TSDB_CFG_CTYPE_B_CLIENT, 1, 65535, 0, TSDB_CFG_UTYPE_NONE); - tsInitConfigOption(cfg++, "mgmtVnodePort", &tsMgmtDnodePort, TSDB_CFG_VTYPE_SHORT, + tsInitConfigOption(cfg++, "mgmtVnodePort", &tsMnodeDnodePort, TSDB_CFG_VTYPE_SHORT, TSDB_CFG_CTYPE_B_CONFIG | TSDB_CFG_CTYPE_B_SHOW | TSDB_CFG_CTYPE_B_CLUSTER, 1, 65535, 0, TSDB_CFG_UTYPE_NONE); tsInitConfigOption(cfg++, "vnodeVnodePort", &tsVnodeVnodePort, TSDB_CFG_VTYPE_SHORT, diff --git a/src/util/src/thashutil.c b/src/util/src/thashutil.c index cf16efe2f8e539f9611952111bafc5d4ff214d3e..f4b4e9faa262c8f5968cb546138feacce4895c38 100644 --- a/src/util/src/thashutil.c +++ b/src/util/src/thashutil.c @@ -7,8 +7,8 @@ * MurmurHash algorithm * */ +#include "hashfunc.h" #include "tutil.h" -#include "hashutil.h" #define ROTL32(x, r) ((x) << (r) | (x) >> (32 - (r))) diff --git a/src/util/src/tlist.c b/src/util/src/tlist.c new file mode 100644 index 0000000000000000000000000000000000000000..badcb7802f510b2978abace6b21a1098e1cdc44d --- /dev/null +++ b/src/util/src/tlist.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#include +#include + +#include "tlist.h" + +SList *tdListNew(int eleSize) { + SList *list = (SList *)malloc(sizeof(SList)); + if (list == NULL) return NULL; + + list->eleSize = eleSize; + list->numOfEles = 0; + list->head = list->tail = NULL; + return list; +} + +void tdListEmpty(SList *list) { + SListNode *node = list->head; + while (node) { + list->head = node->next; + free(node); + node = list->head; + } + list->head = list->tail = 0; + list->numOfEles = 0; +} + +void tdListFree(SList *list) { + tdListEmpty(list); + free(list); +} + +void tdListPrependNode(SList *list, SListNode *node) { + if (list->head == NULL) { + list->head = node; + list->tail = node; + } else { + node->next = list->head; + node->prev = NULL; + list->head->prev = node; + list->head = node; + } + list->numOfEles++; +} + +void tdListAppendNode(SList *list, SListNode *node) { + if (list->head == NULL) { + list->head = node; + list->tail = node; + } else { + node->prev = list->tail; + node->next = NULL; + list->tail->next = node; + list->tail = node; + } + + list->numOfEles++; +} + +int tdListPrepend(SList *list, void *data) { + SListNode *node = (SListNode *)malloc(sizeof(SListNode) + list->eleSize); + if (node == NULL) return -1; + + memcpy((void *)(node->data), data, list->eleSize); + tdListPrependNode(list, node); + + return 0; +} + +int tdListAppend(SList *list, void *data) { + SListNode *node = (SListNode *)malloc(sizeof(SListNode) + list->eleSize); + if (node == NULL) return -1; + + memcpy((void *)(node->data), data, list->eleSize); + tdListAppendNode(list, node); + + return 0; +} + +SListNode *tdListPopHead(SList *list) { + if (list->head == NULL) return NULL; + SListNode *node = list->head; + if (node->next == NULL) { + list->head = NULL; + list->tail = NULL; + } else { + list->head = node->next; + } + list->numOfEles--; + return node; +} + +SListNode *tdListPopTail(SList *list) { + if (list->tail == NULL) return NULL; + SListNode *node = list->tail; + if (node->prev == NULL) { + list->head = NULL; + list->tail = NULL; + } else { + list->tail = node->prev; + } + list->numOfEles--; + return node; +} + +SListNode *tdListPopNode(SList *list, SListNode *node) { + if (list->head == node) { + list->head = node->next; + } + if (list->tail == node) { + list->tail = node->prev; + } + + if (node->prev != NULL) { + node->prev->next = node->next; + } + if (node->next != NULL) { + node->next->prev = node->prev; + } + list->numOfEles--; + + return node; +} + +// Move all node elements from src to dst, the dst is assumed as an empty list +void tdListMove(SList *src, SList *dst) { + // assert(dst->eleSize == src->eleSize); + dst->numOfEles = src->numOfEles; + dst->head = src->head; + dst->tail = src->tail; + src->numOfEles = 0; + src->head = src->tail = NULL; +} + +void tdListNodeGetData(SList *list, SListNode *node, void *target) { memcpy(target, node->data, list->eleSize); } + +void tdListInitIter(SList *list, SListIter *pIter, TD_LIST_DIRECTION_T direction) { + pIter->direction = direction; + if (direction == TD_LIST_FORWARD) { + pIter->next = list->head; + } else { + pIter->next = list->tail; + } +} + +SListNode *tdListNext(SListIter *pIter) { + SListNode *node = pIter->next; + if (node == NULL) return NULL; + if (pIter->direction == TD_LIST_FORWARD) { + pIter->next = node->next; + } else { + pIter->next = node->prev; + } + + return node; +} \ No newline at end of file diff --git a/src/util/src/tskiplist.c b/src/util/src/tskiplist.c index c4f0ccab03359193516029a896dd7f0d0f7ab6c3..85b1d3d206f07647c2fd68c9e61e7f8d7367f290 100644 --- a/src/util/src/tskiplist.c +++ b/src/util/src/tskiplist.c @@ -253,6 +253,7 @@ static __compar_fn_t getKeyComparator(int32_t keyType) { comparFn = compareInt32Val; break; case TSDB_DATA_TYPE_BIGINT: + case TSDB_DATA_TYPE_TIMESTAMP: comparFn = compareInt64Val; break; case TSDB_DATA_TYPE_BOOL: @@ -524,6 +525,71 @@ SArray* tSkipListGet(SSkipList *pSkipList, SSkipListKey pKey, int16_t keyType) { return sa; } +size_t tSkipListGetSize(const SSkipList* pSkipList) { + if (pSkipList == NULL) { + return 0; + } + + return pSkipList->size; +} + +SSkipListIterator* tSkipListCreateIter(SSkipList *pSkipList) { + if (pSkipList == NULL) { + return NULL; + } + + SSkipListIterator* iter = calloc(1, sizeof(SSkipListIterator)); + + iter->pSkipList = pSkipList; + if (pSkipList->lock) { + pthread_rwlock_rdlock(pSkipList->lock); + } + + iter->cur = NULL; + iter->num = pSkipList->size; + + if (pSkipList->lock) { + pthread_rwlock_unlock(pSkipList->lock); + } + + return iter; +} + +bool tSkipListIterNext(SSkipListIterator *iter) { + if (iter->num == 0 || iter->pSkipList == NULL) { + return false; + } + + SSkipList *pSkipList = iter->pSkipList; + + if (pSkipList->lock) { + pthread_rwlock_rdlock(pSkipList->lock); + } + + if (iter->cur == NULL) { + iter->cur = SL_GET_FORWARD_POINTER(pSkipList->pHead, 0); + } else { + iter->cur = SL_GET_FORWARD_POINTER(iter->cur, 0); + } + + if (pSkipList->lock) { + pthread_rwlock_unlock(pSkipList->lock); + } + + return iter->cur != NULL; +} + +SSkipListNode *tSkipListIterGet(SSkipListIterator *iter) { return (iter == NULL)? NULL:iter->cur; } + +void* tSkipListDestroyIter(SSkipListIterator* iter) { + if (iter == NULL) { + return NULL; + } + + tfree(iter); + return NULL; +} + // static int32_t tSkipListEndParQuery(SSkipList *pSkipList, SSkipListNode *pStartNode, SSkipListKey *pEndKey, // int32_t cond, SSkipListNode ***pRes) { // pthread_rwlock_rdlock(&pSkipList->lock); diff --git a/src/util/src/tstatus.c b/src/util/src/tstatus.c index ea8fb630c8b97efe43592beed267d8991db3103e..39704464e800db3be75c623eaac437e9a3fe2214 100644 --- a/src/util/src/tstatus.c +++ b/src/util/src/tstatus.c @@ -16,20 +16,20 @@ #include "taosmsg.h" #include "tstatus.h" -const char* taosGetVgroupStatusStr(int32_t vgroupStatus) { +char* taosGetVgroupStatusStr(int32_t vgroupStatus) { switch (vgroupStatus) { - case TSDB_VG_STATUS_READY: return tstrerror(vgroupStatus); - case TSDB_VG_STATUS_IN_PROGRESS: return tstrerror(vgroupStatus); - case TSDB_VG_STATUS_NO_DISK_PERMISSIONS: return tstrerror(vgroupStatus); - case TSDB_VG_STATUS_SERVER_NO_PACE: return tstrerror(vgroupStatus); - case TSDB_VG_STATUS_SERV_OUT_OF_MEMORY: return tstrerror(vgroupStatus); - case TSDB_VG_STATUS_INIT_FAILED: return tstrerror(vgroupStatus); - case TSDB_VG_STATUS_FULL: return tstrerror(vgroupStatus); + case TSDB_VG_STATUS_READY: return (char*)tstrerror(vgroupStatus); + case TSDB_VG_STATUS_IN_PROGRESS: return (char*)tstrerror(vgroupStatus); + case TSDB_VG_STATUS_NO_DISK_PERMISSIONS: return (char*)tstrerror(vgroupStatus); + case TSDB_VG_STATUS_SERVER_NO_PACE: return (char*)tstrerror(vgroupStatus); + case TSDB_VG_STATUS_SERV_OUT_OF_MEMORY: return (char*)tstrerror(vgroupStatus); + case TSDB_VG_STATUS_INIT_FAILED: return (char*)tstrerror(vgroupStatus); + case TSDB_VG_STATUS_FULL: return (char*)tstrerror(vgroupStatus); default: return "undefined"; } } -const char* taosGetDbStatusStr(int32_t dbStatus) { +char* taosGetDbStatusStr(int32_t dbStatus) { switch (dbStatus) { case TSDB_DB_STATUS_READY: return "ready"; case TSDB_DB_STATUS_DROPPING: return "dropping"; @@ -38,7 +38,7 @@ const char* taosGetDbStatusStr(int32_t dbStatus) { } } -const char* taosGetVnodeStatusStr(int32_t vnodeStatus) { +char* taosGetVnodeStatusStr(int32_t vnodeStatus) { switch (vnodeStatus) { case TSDB_VN_STATUS_OFFLINE: return "offline"; case TSDB_VN_STATUS_CREATING: return "creating"; @@ -51,7 +51,7 @@ const char* taosGetVnodeStatusStr(int32_t vnodeStatus) { } } -const char* taosGetVnodeSyncStatusStr(int32_t vnodeSyncStatus) { +char* taosGetVnodeSyncStatusStr(int32_t vnodeSyncStatus) { switch (vnodeSyncStatus) { case TSDB_VN_SYNC_STATUS_INIT: return "ready"; case TSDB_VN_SYNC_STATUS_SYNCING: return "syncing"; @@ -61,7 +61,7 @@ const char* taosGetVnodeSyncStatusStr(int32_t vnodeSyncStatus) { } } -const char* taosGetVnodeDropStatusStr(int32_t dropping) { +char* taosGetVnodeDropStatusStr(int32_t dropping) { switch (dropping) { case TSDB_VN_DROP_STATUS_READY: return "ready"; case TSDB_VN_DROP_STATUS_DROPPING: return "dropping"; @@ -69,7 +69,7 @@ const char* taosGetVnodeDropStatusStr(int32_t dropping) { } } -const char* taosGetDnodeStatusStr(int32_t dnodeStatus) { +char* taosGetDnodeStatusStr(int32_t dnodeStatus) { switch (dnodeStatus) { case TSDB_DN_STATUS_OFFLINE: return "offline"; case TSDB_DN_STATUS_READY: return "ready"; @@ -77,7 +77,7 @@ const char* taosGetDnodeStatusStr(int32_t dnodeStatus) { } } -const char* taosGetDnodeLbStatusStr(int32_t dnodeBalanceStatus) { +char* taosGetDnodeLbStatusStr(int32_t dnodeBalanceStatus) { switch (dnodeBalanceStatus) { case TSDB_DN_LB_STATUS_BALANCED: return "balanced"; case TSDB_DN_LB_STATUS_BALANCING: return "balancing"; @@ -87,7 +87,7 @@ const char* taosGetDnodeLbStatusStr(int32_t dnodeBalanceStatus) { } } -const char* taosGetVgroupLbStatusStr(int32_t vglbStatus) { +char* taosGetVgroupLbStatusStr(int32_t vglbStatus) { switch (vglbStatus) { case TSDB_VG_LB_STATUS_READY: return "ready"; case TSDB_VG_LB_STATUS_UPDATE: return "updating"; @@ -95,7 +95,7 @@ const char* taosGetVgroupLbStatusStr(int32_t vglbStatus) { } } -const char* taosGetVnodeStreamStatusStr(int32_t vnodeStreamStatus) { +char* taosGetVnodeStreamStatusStr(int32_t vnodeStreamStatus) { switch (vnodeStreamStatus) { case TSDB_VN_STREAM_STATUS_START: return "start"; case TSDB_VN_STREAM_STATUS_STOP: return "stop"; @@ -103,9 +103,9 @@ const char* taosGetVnodeStreamStatusStr(int32_t vnodeStreamStatus) { } } -const char* taosGetTableStatusStr(int32_t tableStatus) { +char* taosGetTableStatusStr(int32_t tableStatus) { switch(tableStatus) { - case TSDB_METER_STATE_INSERTING: return "inserting"; + case TSDB_METER_STATE_INSERTING:return "inserting"; case TSDB_METER_STATE_IMPORTING:return "importing"; case TSDB_METER_STATE_UPDATING: return "updating"; case TSDB_METER_STATE_DROPPING: return "deleting"; @@ -114,3 +114,25 @@ const char* taosGetTableStatusStr(int32_t tableStatus) { default:return "undefined"; } } + +char *taosGetShowTypeStr(int32_t showType) { + switch (showType) { + case TSDB_MGMT_TABLE_ACCT: return "show accounts"; + case TSDB_MGMT_TABLE_USER: return "show users"; + case TSDB_MGMT_TABLE_DB: return "show databases"; + case TSDB_MGMT_TABLE_TABLE: return "show tables"; + case TSDB_MGMT_TABLE_DNODE: return "show dnodes"; + case TSDB_MGMT_TABLE_MNODE: return "show mnodes"; + case TSDB_MGMT_TABLE_VGROUP: return "show vgroups"; + case TSDB_MGMT_TABLE_METRIC: return "show stables"; + case TSDB_MGMT_TABLE_MODULE: return "show modules"; + case TSDB_MGMT_TABLE_QUERIES: return "show queries"; + case TSDB_MGMT_TABLE_STREAMS: return "show streams"; + case TSDB_MGMT_TABLE_CONFIGS: return "show configs"; + case TSDB_MGMT_TABLE_CONNS: return "show connections"; + case TSDB_MGMT_TABLE_SCORES: return "show scores"; + case TSDB_MGMT_TABLE_GRANTS: return "show grants"; + case TSDB_MGMT_TABLE_VNODES: return "show vnodes"; + default: return "undefined"; + } +} diff --git a/src/util/src/tstring.c b/src/util/src/tstring.c index a5ab7fbf67d9124e0fa2fa85b60a2e2f70a22eb5..7aca939f478defb2a88ac23f1480c159be774bae 100644 --- a/src/util/src/tstring.c +++ b/src/util/src/tstring.c @@ -26,23 +26,23 @@ char *taosMsg[] = { "create-table", "create-table-rsp", //10 - "remove-table", - "remove-table-rsp", + "drop-table", + "drop-table-rsp", + "alter-table", + "alter-table-rsp", "create-vnode", "create-vnode-rsp", - "free-vnode", - "free-vnode-rsp", - "cfg-dnode", - "cfg-dnode-rsp", - "alter-stream", - "alter-stream-rsp", //20 + "drop-vnode", + "drop-vnode-rsp", + "alter-vnode", + "alter-vnode-rsp", //20 - "sync", - "sync-rsp", - "forward", - "forward-rsp", "drop-stable", "drop-stable-rsp", + "alter-stream", + "alter-stream-rsp", + "config-dnode", + "config-dnode-rsp", "", "", "", @@ -63,37 +63,26 @@ char *taosMsg[] = { "alter-user-rsp", "drop-user", "drop-user-rsp", - "create-mnode", - "create-mnode-rsp", - "drop-mnode", - "drop-mnode-rsp", "create-dnode", - "create-dnode-rsp", //50 - + "create-dnode-rsp", "drop-dnode", "drop-dnode-rsp", - "alter-dnode", - "alter-dnode-rsp", "create-db", - "create-db-rsp", + "create-db-rsp", //50 + "drop-db", "drop-db-rsp", "use-db", - "use-db-rsp", //60 - + "use-db-rsp", "alter-db", "alter-db-rsp", "create-table", "create-table-rsp", "drop-table", - "drop-table-rsp", + "drop-table-rsp", //60 + "alter-table", "alter-table-rsp", - "cfg-vnode", - "cfg-vnode-rsp", //70 - - "cfg-table", - "cfg-table-rsp", "table-meta", "table-meta-rsp", "super-table-meta", @@ -101,24 +90,43 @@ char *taosMsg[] = { "multi-table-meta", "multi-table-meta-rsp", "alter-stream", - "alter-stream-rsp", //80 + "alter-stream-rsp", //70 "show", "show-rsp", - "cfg-mnode", - "cfg-mnode-rsp", "kill-query", "kill-query-rsp", "kill-stream", "kill-stream-rsp", "kill-connection", - "kill-connectoin-rsp", //90 - + "kill-connectoin-rsp", "heart-beat", - "heart-beat-rsp", + "heart-beat-rsp", //80 + + "", + "", + "", + "", + "", + "", + "", + "", //90 + + "config-table", + "config-table-rsp", + "config-vnode", + "config-vnode-rsp", "status", "status-rsp", "grant", "grant-rsp", + "", + "", //100 + + "sdb-sync", + "sdb-sync-rsp", + "sdb-forward", + "sdb-forward-rsp", "max" -}; \ No newline at end of file +}; + diff --git a/src/util/src/tutil.c b/src/util/src/tutil.c index d5e67adff693b427633908b39e862e1993812c5f..9c384b25bace047cdd80fe2903c330dbe3ed85d7 100644 --- a/src/util/src/tutil.c +++ b/src/util/src/tutil.c @@ -27,6 +27,8 @@ #include "tlog.h" #include "taoserror.h" +int32_t tmpFileSerialNum = 0; + int32_t strdequote(char *z) { if (z == NULL) { return 0; @@ -401,6 +403,27 @@ int32_t taosFileRename(char *fullPath, char *suffix, char delimiter, char **dstP return rename(fullPath, *dstPath); } +void getTmpfilePath(const char *fileNamePrefix, char *dstPath) { + const char* tdengineTmpFileNamePrefix = "tdengine-"; + + char tmpPath[PATH_MAX] = {0}; + +#ifdef WINDOWS + char *tmpDir = getenv("tmp"); + if (tmpDir == NULL) { + tmpDir = ""; + } +#else + char *tmpDir = "/tmp/"; +#endif + + strcpy(tmpPath, tmpDir); + strcat(tmpPath, tdengineTmpFileNamePrefix); + strcat(tmpPath, fileNamePrefix); + strcat(tmpPath, "-%llu-%u"); + snprintf(dstPath, PATH_MAX, tmpPath, taosGetPthreadId(), atomic_add_fetch_32(&tmpFileSerialNum, 1)); +} + int tasoUcs4Compare(void* f1_ucs4, void *f2_ucs4, int bytes) { #if defined WINDOWS for (int i = 0; i < bytes; ++i) { diff --git a/src/vnode/detail/inc/vnodeQueryImpl.h b/src/vnode/detail/inc/vnodeQueryImpl.h index e3507d5f82e8c156ffdc5a3babae8ea5af079398..9c28af22c7818b3eee53360760c3a60b14b90b13 100644 --- a/src/vnode/detail/inc/vnodeQueryImpl.h +++ b/src/vnode/detail/inc/vnodeQueryImpl.h @@ -23,7 +23,7 @@ extern "C" { #include "os.h" #include "hash.h" -#include "hashutil.h" +#include "hashfunc.h" #define GET_QINFO_ADDR(x) ((char*)(x)-offsetof(SQInfo, query)) #define Q_STATUS_EQUAL(p, s) (((p) & (s)) != 0) @@ -119,7 +119,7 @@ typedef enum { typedef int (*__block_search_fn_t)(char* data, int num, int64_t key, int order); static FORCE_INLINE SMeterObj* getMeterObj(void* hashHandle, int32_t sid) { - return *(SMeterObj**)taosGetDataFromHashTable(hashHandle, (const char*)&sid, sizeof(sid)); + return *(SMeterObj**)taosHashGet(hashHandle, (const char*)&sid, sizeof(sid)); } bool isQueryKilled(SQuery* pQuery); diff --git a/src/vnode/detail/inc/vnodeRead.h b/src/vnode/detail/inc/vnodeRead.h index 2758cfe1d9610257c7ddf0658874a7ee57511fc0..4e6e04208d7fd08d4af0e59337683d157c068114 100644 --- a/src/vnode/detail/inc/vnodeRead.h +++ b/src/vnode/detail/inc/vnodeRead.h @@ -21,9 +21,9 @@ extern "C" { #endif #include "os.h" -#include "tresultBuf.h" +#include "qresultBuf.h" -#include "tinterpolation.h" +#include "qinterpolation.h" #include "vnodeTagMgmt.h" /* @@ -170,7 +170,7 @@ typedef struct SQueryRuntimeEnv { STSCursor cur; SQueryCostSummary summary; bool stableQuery; // is super table query or not - SQueryDiskbasedResultBuf* pResultBuf; // query result buffer based on blocked-wised disk file + SDiskbasedResultBuf* pResultBuf; // query result buffer based on blocked-wised disk file /* * Temporarily hold the in-memory cache block info during scan cache blocks diff --git a/src/vnode/detail/inc/vnodeSupertableQuery.h b/src/vnode/detail/inc/vnodeSupertableQuery.h index bc7fa1e81b03d65c34dda30ea35d352867eaec75..cc2d21871c63510fb45bdf54db15944da593e349 100644 --- a/src/vnode/detail/inc/vnodeSupertableQuery.h +++ b/src/vnode/detail/inc/vnodeSupertableQuery.h @@ -16,11 +16,9 @@ #ifndef TBASE_MNODE_SUPER_TABLE_QUERY_H #define TBASE_MNODE_SUPER_TABLE_QUERY_H -#include -#include -#include +#include "os.h" #include "mnode.h" -#include "tast.h" +#include "qast.h" int32_t mgmtDoJoin(SSuperTableMetaMsg* pSuperTableMetaMsg, tQueryResultset* pRes); void mgmtReorganizeMetersInMetricMeta(SSuperTableMetaMsg* pInfo, int32_t index, tQueryResultset* pRes); diff --git a/src/vnode/detail/src/vnodeQueryImpl.c b/src/vnode/detail/src/vnodeQueryImpl.c index f761205719c8020b871448942461b2df2dc265d9..9eb3fb8b65f8f6299729b77edf4c14776f0d99f9 100644 --- a/src/vnode/detail/src/vnodeQueryImpl.c +++ b/src/vnode/detail/src/vnodeQueryImpl.c @@ -14,13 +14,13 @@ */ #include "hash.h" -#include "hashutil.h" +#include "hashfunc.h" #include "os.h" +#include "qextbuffer.h" #include "taosmsg.h" -#include "textbuffer.h" #include "ttime.h" -#include "tinterpolation.h" +#include "qinterpolation.h" #include "tscJoinProcess.h" #include "tscSecondaryMerge.h" #include "tscompression.h" @@ -1460,7 +1460,7 @@ static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWin int16_t bytes) { SQuery *pQuery = pRuntimeEnv->pQuery; - int32_t *p1 = (int32_t *)taosGetDataFromHashTable(pWindowResInfo->hashList, pData, bytes); + int32_t *p1 = (int32_t *)taosHashGet(pWindowResInfo->hashList, pData, bytes); if (p1 != NULL) { pWindowResInfo->curIndex = *p1; } else { // more than the capacity, reallocate the resources @@ -1485,7 +1485,7 @@ static SWindowResult *doSetTimeWindowFromKey(SQueryRuntimeEnv *pRuntimeEnv, SWin // add a new result set for a new group pWindowResInfo->curIndex = pWindowResInfo->size++; - taosAddToHashTable(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t)); + taosHashPut(pWindowResInfo->hashList, pData, bytes, (char *)&pWindowResInfo->curIndex, sizeof(int32_t)); } return getWindowResult(pWindowResInfo, pWindowResInfo->curIndex); @@ -1532,7 +1532,7 @@ static STimeWindow getActiveTimeWindow(SWindowResInfo *pWindowResInfo, int64_t t return w; } -static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SQueryDiskbasedResultBuf *pResultBuf, int32_t sid, +static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResultBuf *pResultBuf, int32_t sid, int32_t numOfRowsPerPage) { if (pWindowRes->pos.pageId != -1) { return 0; @@ -1574,7 +1574,7 @@ static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SQueryDiskbasedR static int32_t setWindowOutputBufByKey(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowResInfo, int32_t sid, STimeWindow *win) { assert(win->skey <= win->ekey); - SQueryDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; + SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, pWindowResInfo, (char *)&win->skey, TSDB_KEYSIZE); if (pWindowRes == NULL) { @@ -2018,7 +2018,7 @@ int32_t initWindowResInfo(SWindowResInfo *pWindowResInfo, SQueryRuntimeEnv *pRun pWindowResInfo->type = type; _hash_fn_t fn = taosGetDefaultHashFunction(type); - pWindowResInfo->hashList = taosInitHashTable(threshold, fn, false); + pWindowResInfo->hashList = taosHashInit(threshold, fn, false); pWindowResInfo->curIndex = -1; pWindowResInfo->size = 0; @@ -2044,7 +2044,7 @@ void cleanupTimeWindowInfo(SWindowResInfo *pWindowResInfo, SQueryRuntimeEnv *pRu destroyTimeWindowRes(pResult, pRuntimeEnv->pQuery->numOfOutputCols); } - taosCleanUpHashTable(pWindowResInfo->hashList); + taosHashCleanup(pWindowResInfo->hashList); tfree(pWindowResInfo->pResult); } @@ -2059,11 +2059,11 @@ void resetTimeWindowInfo(SQueryRuntimeEnv *pRuntimeEnv, SWindowResInfo *pWindowR } pWindowResInfo->curIndex = -1; - taosCleanUpHashTable(pWindowResInfo->hashList); + taosHashCleanup(pWindowResInfo->hashList); pWindowResInfo->size = 0; _hash_fn_t fn = taosGetDefaultHashFunction(pWindowResInfo->type); - pWindowResInfo->hashList = taosInitHashTable(pWindowResInfo->capacity, fn, false); + pWindowResInfo->hashList = taosHashInit(pWindowResInfo->capacity, fn, false); pWindowResInfo->startTime = 0; pWindowResInfo->prevSKey = 0; @@ -2081,7 +2081,7 @@ void clearFirstNTimeWindow(SQueryRuntimeEnv *pRuntimeEnv, int32_t num) { for (int32_t i = 0; i < num; ++i) { SWindowResult *pResult = &pWindowResInfo->pResult[i]; if (pResult->status.closed) { // remove the window slot from hash table - taosDeleteFromHashTable(pWindowResInfo->hashList, (const char *)&pResult->window.skey, TSDB_KEYSIZE); + taosHashRemove(pWindowResInfo->hashList, (const char *)&pResult->window.skey, TSDB_KEYSIZE); } else { break; } @@ -2104,14 +2104,14 @@ void clearFirstNTimeWindow(SQueryRuntimeEnv *pRuntimeEnv, int32_t num) { for (int32_t k = 0; k < pWindowResInfo->size; ++k) { SWindowResult *pResult = &pWindowResInfo->pResult[k]; - int32_t *p = (int32_t *)taosGetDataFromHashTable(pWindowResInfo->hashList, (const char *)&pResult->window.skey, + int32_t *p = (int32_t *)taosHashGet(pWindowResInfo->hashList, (const char *)&pResult->window.skey, TSDB_KEYSIZE); int32_t v = (*p - num); assert(v >= 0 && v <= pWindowResInfo->size); // todo add the update function for hash table - taosDeleteFromHashTable(pWindowResInfo->hashList, (const char *)&pResult->window.skey, TSDB_KEYSIZE); - taosAddToHashTable(pWindowResInfo->hashList, (const char *)&pResult->window.skey, TSDB_KEYSIZE, (char *)&v, + taosHashRemove(pWindowResInfo->hashList, (const char *)&pResult->window.skey, TSDB_KEYSIZE); + taosHashPut(pWindowResInfo->hashList, (const char *)&pResult->window.skey, TSDB_KEYSIZE, (char *)&v, sizeof(int32_t)); } @@ -2156,7 +2156,7 @@ static int32_t setGroupResultOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, char *pDat int32_t GROUPRESULTID = 1; - SQueryDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; + SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; SWindowResult *pWindowRes = doSetTimeWindowFromKey(pRuntimeEnv, &pRuntimeEnv->windowResInfo, pData, bytes); if (pWindowRes == NULL) { @@ -4812,7 +4812,7 @@ void vnodeQueryFreeQInfoEx(SQInfo *pQInfo) { tfree(pSupporter->pMeterSidExtInfo); if (pSupporter->pMetersHashTable != NULL) { - taosCleanUpHashTable(pSupporter->pMetersHashTable); + taosHashCleanup(pSupporter->pMetersHashTable); pSupporter->pMetersHashTable = NULL; } @@ -5594,7 +5594,7 @@ void UNUSED_FUNC displayInterResult(SData **pdata, SQuery *pQuery, int32_t numOf } } -// static tFilePage *getMeterDataPage(SQueryDiskbasedResultBuf *pResultBuf, SMeterQueryInfo *pMeterQueryInfo, +// static tFilePage *getMeterDataPage(SDiskbasedResultBuf *pResultBuf, SMeterQueryInfo *pMeterQueryInfo, // int32_t index) { // SIDList pList = getDataBufPagesIdList(pResultBuf, pMeterQueryInfo->sid); // return getResultBufferPageById(pResultBuf, pList.pData[index]); @@ -5700,7 +5700,7 @@ void copyResToQueryResultBuf(STableQuerySupportObj *pSupporter, SQuery *pQuery) } SQueryRuntimeEnv * pRuntimeEnv = &pSupporter->runtimeEnv; - SQueryDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; + SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; int32_t id = getGroupResultId(pSupporter->subgroupIdx - 1); SIDList list = getDataBufPagesIdList(pResultBuf, pSupporter->offset + id); @@ -5883,7 +5883,7 @@ int32_t doMergeMetersResultsToGroupRes(STableQuerySupportObj *pSupporter, SQuery int32_t flushFromResultBuf(STableQuerySupportObj *pSupporter, const SQuery *pQuery, const SQueryRuntimeEnv *pRuntimeEnv) { - SQueryDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; + SDiskbasedResultBuf *pResultBuf = pRuntimeEnv->pResultBuf; int32_t capacity = (DEFAULT_INTERN_BUF_SIZE - sizeof(tFilePage)) / pQuery->rowSize; // the base value for group result, since the maximum number of table for each vnode will not exceed 100,000. diff --git a/src/vnode/detail/src/vnodeQueryProcess.c b/src/vnode/detail/src/vnodeQueryProcess.c index ae51365918b142e392dcffa27a6b071543f3d02e..cedb76b4accda46e934694cdbb6de416dcc8f75b 100644 --- a/src/vnode/detail/src/vnodeQueryProcess.c +++ b/src/vnode/detail/src/vnodeQueryProcess.c @@ -16,8 +16,8 @@ #define _DEFAULT_SOURCE #include "os.h" +#include "qextbuffer.h" #include "taosmsg.h" -#include "textbuffer.h" #include "tscJoinProcess.h" #include "ttime.h" #include "vnode.h" diff --git a/src/vnode/detail/src/vnodeRead.c b/src/vnode/detail/src/vnodeRead.c index fb7c85e61c61272159202a2bda4ef7f30fdd93d0..86f508dd9180d5b0d512c3ce509e7cffb7254ca2 100644 --- a/src/vnode/detail/src/vnodeRead.c +++ b/src/vnode/detail/src/vnodeRead.c @@ -16,17 +16,17 @@ #define _DEFAULT_SOURCE #include "os.h" +#include "hash.h" +#include "hashfunc.h" #include "ihash.h" +#include "qast.h" +#include "qextbuffer.h" #include "taosmsg.h" -#include "tast.h" -#include "textbuffer.h" #include "tscJoinProcess.h" #include "tscompression.h" #include "vnode.h" #include "vnodeRead.h" #include "vnodeUtil.h" -#include "hash.h" -#include "hashutil.h" int (*pQueryFunc[])(SMeterObj *, SQuery *) = {vnodeQueryFromCache, vnodeQueryFromFile}; @@ -651,8 +651,8 @@ void *vnodeQueryOnSingleTable(SMeterObj **pMetersObj, SSqlGroupbyExpr *pGroupbyE STableQuerySupportObj *pSupporter = (STableQuerySupportObj *)calloc(1, sizeof(STableQuerySupportObj)); pSupporter->numOfMeters = 1; - pSupporter->pMetersHashTable = taosInitHashTable(pSupporter->numOfMeters, taosIntHash_32, false); - taosAddToHashTable(pSupporter->pMetersHashTable, (const char*) &pMetersObj[0]->sid, sizeof(pMeterObj[0].sid), + pSupporter->pMetersHashTable = taosHashInit(pSupporter->numOfMeters, taosIntHash_32, false); + taosHashPut(pSupporter->pMetersHashTable, (const char*) &pMetersObj[0]->sid, sizeof(pMeterObj[0].sid), (char *)&pMetersObj[0], POINTER_BYTES); pSupporter->pSidSet = NULL; @@ -742,9 +742,9 @@ void *vnodeQueryOnMultiMeters(SMeterObj **pMetersObj, SSqlGroupbyExpr *pGroupbyE STableQuerySupportObj *pSupporter = (STableQuerySupportObj *)calloc(1, sizeof(STableQuerySupportObj)); pSupporter->numOfMeters = pQueryMsg->numOfSids; - pSupporter->pMetersHashTable = taosInitHashTable(pSupporter->numOfMeters, taosIntHash_32, false); + pSupporter->pMetersHashTable = taosHashInit(pSupporter->numOfMeters, taosIntHash_32, false); for (int32_t i = 0; i < pSupporter->numOfMeters; ++i) { - taosAddToHashTable(pSupporter->pMetersHashTable, (const char*) &pMetersObj[i]->sid, sizeof(pMetersObj[i]->sid), (char *)&pMetersObj[i], + taosHashPut(pSupporter->pMetersHashTable, (const char*) &pMetersObj[i]->sid, sizeof(pMetersObj[i]->sid), (char *)&pMetersObj[i], POINTER_BYTES); } diff --git a/src/vnode/detail/src/vnodeSupertableQuery.c b/src/vnode/detail/src/vnodeSupertableQuery.c index 038577bd8d02bc490d283b286b1f4f3d6fb2749b..36cb7ad74125cc677b48951e3dea7a203da57dc5 100644 --- a/src/vnode/detail/src/vnodeSupertableQuery.c +++ b/src/vnode/detail/src/vnodeSupertableQuery.c @@ -14,13 +14,12 @@ */ #define _DEFAULT_SOURCE -#include "os.h" #include "mnode.h" -#include "textbuffer.h" +#include "os.h" +#include "qast.h" +#include "qextbuffer.h" #include "tschemautil.h" #include "tsqlfunction.h" -#include "tast.h" -//#include "vnodeTagMgmt.h" typedef struct SSyntaxTreeFilterSupporter { SSchema* pTagSchema; diff --git a/src/vnode/detail/src/vnodeTagMgmt.c b/src/vnode/detail/src/vnodeTagMgmt.c index d3e22ec00adfbf418264888ea01a4940723df2a6..054a18900c050d700663903d1314c52bc21eb5a7 100644 --- a/src/vnode/detail/src/vnodeTagMgmt.c +++ b/src/vnode/detail/src/vnodeTagMgmt.c @@ -16,12 +16,12 @@ #define _DEFAULT_SOURCE #include "os.h" +#include "qast.h" +#include "qextbuffer.h" #include "taosdef.h" +#include "taosmsg.h" #include "tlog.h" #include "tutil.h" -#include "taosmsg.h" -#include "textbuffer.h" -#include "tast.h" #include "vnodeTagMgmt.h" #define GET_TAG_VAL_POINTER(s, col, sc, t) ((t *)(&((s)->tags[getColumnModelOffset(sc, col)]))) diff --git a/src/vnode/detail/src/vnodeUtil.c b/src/vnode/detail/src/vnodeUtil.c index feef9ed47324c8c32b64f7c5fb156e5a7c52bf21..43c24bae6af38676a7c9a4a3125f717dfc6d050c 100644 --- a/src/vnode/detail/src/vnodeUtil.c +++ b/src/vnode/detail/src/vnodeUtil.c @@ -16,13 +16,13 @@ #define _DEFAULT_SOURCE #include "os.h" -#include "tast.h" +#include "qast.h" #include "tscUtil.h" #include "tschemautil.h" #include "vnode.h" #include "vnodeDataFilterFunc.h" -#include "vnodeUtil.h" #include "vnodeStatus.h" +#include "vnodeUtil.h" int vnodeCheckFileIntegrity(FILE* fp) { /* diff --git a/src/vnode/tsdb/CMakeLists.txt b/src/vnode/tsdb/CMakeLists.txt index 8a7c7a1a5197e3e47ed7e36cdb2ebcdcef2d6b49..b2154969d6209243511768f43686e2b47d787936 100644 --- a/src/vnode/tsdb/CMakeLists.txt +++ b/src/vnode/tsdb/CMakeLists.txt @@ -15,5 +15,5 @@ IF ((TD_LINUX_64) OR (TD_LINUX_32 AND TD_ARM)) TARGET_LINK_LIBRARIES(tsdb common tutil) # Someone has no gtest directory, so comment it - ADD_SUBDIRECTORY(tests) + # ADD_SUBDIRECTORY(tests) ENDIF () diff --git a/src/vnode/tsdb/inc/tsdb.h b/src/vnode/tsdb/inc/tsdb.h index d5493fdee0e44e20aa27913bdb4e34baec43e518..4964ac673f1b25d351d3eb5f0e8e146d510776db 100644 --- a/src/vnode/tsdb/inc/tsdb.h +++ b/src/vnode/tsdb/inc/tsdb.h @@ -12,16 +12,18 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ -#if !defined(_TD_TSDB_H_) +#ifndef _TD_TSDB_H_ #define _TD_TSDB_H_ #include #include #include +#include "dataformat.h" #include "taosdef.h" #include "taosmsg.h" -#include "dataformat.h" +#include "tarray.h" +#include "name.h" #ifdef __cplusplus extern "C" { @@ -30,42 +32,98 @@ extern "C" { #define TSDB_VERSION_MAJOR 1 #define TSDB_VERSION_MINOR 0 +#define TSDB_INVALID_SUPER_TABLE_ID -1 + +// --------- TSDB REPOSITORY CONFIGURATION DEFINITION +typedef struct { + int8_t precision; + int32_t tsdbId; + int32_t maxTables; // maximum number of tables this repository can have + int32_t daysPerFile; // day per file sharding policy + int32_t minRowsPerFileBlock; // minimum rows per file block + int32_t maxRowsPerFileBlock; // maximum rows per file block + int32_t keep; // day of data to keep + int64_t maxCacheSize; // maximum cache size this TSDB can use +} STsdbCfg; + +void tsdbSetDefaultCfg(STsdbCfg *pCfg); +STsdbCfg *tsdbCreateDefaultCfg(); +void tsdbFreeCfg(STsdbCfg *pCfg); + +// --------- TSDB REPOSITORY DEFINITION typedef void tsdb_repo_t; // use void to hide implementation details from outside +tsdb_repo_t * tsdbCreateRepo(char *rootDir, STsdbCfg *pCfg, void *limiter); +int32_t tsdbDropRepo(tsdb_repo_t *repo); +tsdb_repo_t * tsdbOpenRepo(char *tsdbDir); +int32_t tsdbCloseRepo(tsdb_repo_t *repo); +int32_t tsdbConfigRepo(tsdb_repo_t *repo, STsdbCfg *pCfg); +int32_t tsdbTriggerCommit(tsdb_repo_t *repo); + +// --------- TSDB TABLE DEFINITION typedef struct { int64_t uid; // the unique table ID int32_t tid; // the table ID in the repository. } STableId; -// Submit message for this TSDB +// --------- TSDB TABLE configuration typedef struct { - int32_t numOfTables; - int32_t compressed; - char data[]; -} SSubmitMsg; + TSDB_TABLE_TYPE type; + STableId tableId; + int32_t sversion; + int64_t superUid; + STSchema * schema; + STSchema * tagSchema; + SDataRow tagValues; +} STableCfg; + +int tsdbInitTableCfg(STableCfg *config, TSDB_TABLE_TYPE type, int64_t uid, int32_t tid); +int tsdbTableSetSuperUid(STableCfg *config, int64_t uid); +int tsdbTableSetSchema(STableCfg *config, STSchema *pSchema, bool dup); +int tsdbTableSetTagSchema(STableCfg *config, STSchema *pSchema, bool dup); +int tsdbTableSetTagValue(STableCfg *config, SDataRow row, bool dup); +void tsdbClearTableCfg(STableCfg *config); + +int tsdbCreateTable(tsdb_repo_t *repo, STableCfg *pCfg); +int tsdbDropTable(tsdb_repo_t *pRepo, STableId tableId); +int tsdbAlterTable(tsdb_repo_t *repo, STableCfg *pCfg); // Submit message for one table typedef struct { STableId tableId; int32_t padding; // TODO just for padding here int32_t sversion; // data schema version - int32_t len; // message length + int32_t len; // data part length, not including the SSubmitBlk head char data[]; -} SSubmitBlock; +} SSubmitBlk; -enum { TSDB_PRECISION_MILLI, TSDB_PRECISION_MICRO, TSDB_PRECISION_NANO }; +typedef struct { + int32_t totalLen; + int32_t len; + SDataRow row; +} SSubmitBlkIter; -// the TSDB repository configuration +int tsdbInitSubmitBlkIter(SSubmitBlk *pBlock, SSubmitBlkIter *pIter); +SDataRow tsdbGetSubmitBlkNext(SSubmitBlkIter *pIter); + +// Submit message for this TSDB typedef struct { - int8_t precision; - int32_t tsdbId; - int32_t maxTables; // maximum number of tables this repository can have - int32_t daysPerFile; // day per file sharding policy - int32_t minRowsPerFileBlock; // minimum rows per file block - int32_t maxRowsPerFileBlock; // maximum rows per file block - int32_t keep; // day of data to keep - int64_t maxCacheSize; // maximum cache size this TSDB can use -} STsdbCfg; + int32_t length; + int32_t compressed; + SSubmitBlk blocks[]; +} SSubmitMsg; + +#define TSDB_SUBMIT_MSG_HEAD_SIZE sizeof(SSubmitMsg) + +// SSubmitMsg Iterator +typedef struct { + int32_t totalLen; + int32_t len; + SSubmitBlk *pBlock; +} SSubmitMsgIter; + +int tsdbInitSubmitMsgIter(SSubmitMsg *pMsg, SSubmitMsgIter *pIter); +SSubmitBlk *tsdbGetSubmitMsgNext(SSubmitMsgIter *pIter); // the TSDB repository info typedef struct STsdbRepoInfo { @@ -75,22 +133,7 @@ typedef struct STsdbRepoInfo { int64_t tsdbTotalDiskSize; // the total disk size taken by this TSDB repository // TODO: Other informations to add } STsdbRepoInfo; - -// the meter configuration -typedef struct { - STableId tableId; - - int64_t stableUid; - int64_t createdTime; - - int32_t numOfCols; // number of columns. For table form super table, not includes the tag schema - STSchema *schema; // If numOfCols == schema_->numOfCols, it is a normal table, stableName = NULL - // If numOfCols < schema->numOfCols, it is a table created from super table - // assert(numOfCols <= schema->numOfCols); - - SDataRow tagValues; // NULL if it is normal table - // otherwise, it contains the tag values. -} STableCfg; +STsdbRepoInfo *tsdbGetStatus(tsdb_repo_t *pRepo); // the meter information report structure typedef struct { @@ -99,70 +142,7 @@ typedef struct { int64_t tableTotalDataSize; // In bytes int64_t tableTotalDiskSize; // In bytes } STableInfo; - -/** - * Create a configuration for TSDB default - * @return a pointer to a configuration. the configuration must call tsdbFreeCfg to free memory after usage - */ -STsdbCfg *tsdbCreateDefaultCfg(); - -/** - * Free - */ -void tsdbFreeCfg(STsdbCfg *pCfg); - -/** - * Create a new TSDB repository - * @param rootDir the TSDB repository root directory - * @param pCfg the TSDB repository configuration, upper layer to free the pointer - * - * @return a TSDB repository handle on success, NULL for failure and the error number is set - */ -tsdb_repo_t *tsdbCreateRepo(char *rootDir, STsdbCfg *pCfg, void *limiter); - -/** - * Close and free all resources taken by the repository - * @param repo the TSDB repository handle. The interface will free the handle too, so upper - * layer do NOT need to free the repo handle again. - * - * @return 0 for success, -1 for failure and the error number is set - */ -int32_t tsdbDropRepo(tsdb_repo_t *repo); - -/** - * Open an existing TSDB storage repository - * @param tsdbDir the existing TSDB root directory - * - * @return a TSDB repository handle on success, NULL for failure and the error number is set - */ -tsdb_repo_t *tsdbOpenRepo(char *tsdbDir); - -/** - * Close a TSDB repository. Only free memory resources, and keep the files. - * @param repo the opened TSDB repository handle. The interface will free the handle too, so upper - * layer do NOT need to free the repo handle again. - * - * @return 0 for success, -1 for failure and the error number is set - */ -int32_t tsdbCloseRepo(tsdb_repo_t *repo); - -/** - * Change the configuration of a repository - * @param pCfg the repository configuration, the upper layer should free the pointer - * - * @return 0 for success, -1 for failure and the error number is set - */ -int32_t tsdbConfigRepo(tsdb_repo_t *repo, STsdbCfg *pCfg); - -/** - * Get the TSDB repository information, including some statistics - * @param pRepo the TSDB repository handle - * @param error the error number to set when failure occurs - * - * @return a info struct handle on success, NULL for failure and the error number is set. The upper - * layers should free the info handle themselves or memory leak will occur - */ -STsdbRepoInfo *tsdbGetStatus(tsdb_repo_t *pRepo); +STableInfo * tsdbGetTableInfo(tsdb_repo_t *pRepo, STableId tid); // -- For table manipulation @@ -173,8 +153,6 @@ STsdbRepoInfo *tsdbGetStatus(tsdb_repo_t *pRepo); * * @return 0 for success, -1 for failure and the error number is set */ -int32_t tsdbCreateTable(tsdb_repo_t *repo, STableCfg *pCfg); -int32_t tsdbAlterTable(tsdb_repo_t *repo, STableCfg *pCfg); /** * Drop a table in a repository and free all the resources it takes @@ -184,7 +162,6 @@ int32_t tsdbAlterTable(tsdb_repo_t *repo, STableCfg *pCfg); * * @return 0 for success, -1 for failure and the error number is set */ -int32_t tsdbDropTable(tsdb_repo_t *pRepo, STableId tableId); /** * Get the information of a table in the repository @@ -194,7 +171,6 @@ int32_t tsdbDropTable(tsdb_repo_t *pRepo, STableId tableId); * * @return a table information handle for success, NULL for failure and the error number is set */ -STableInfo *tsdbGetTableInfo(tsdb_repo_t *pRepo, STableId tid); // -- FOR INSERT DATA /** @@ -208,23 +184,17 @@ int32_t tsdbInsertData(tsdb_repo_t *pRepo, SSubmitMsg *pMsg); // -- FOR QUERY TIME SERIES DATA -typedef void tsdb_query_handle_t; // Use void to hide implementation details - -// time window -typedef struct STimeWindow { - int64_t skey; - int64_t ekey; -} STimeWindow; +typedef void* tsdb_query_handle_t; // Use void to hide implementation details // typedef struct { // } SColumnFilterInfo; // query condition to build vnode iterator -typedef struct STSDBQueryCond { +typedef struct STsdbQueryCond { STimeWindow twindow; int32_t order; // desc/asc order to iterate the data block - SColumnFilterInfo colFilterInfo; -} STSDBQueryCond; + SColumnInfoEx colList; +} STsdbQueryCond; typedef struct SBlockInfo { STimeWindow window; @@ -236,15 +206,18 @@ typedef struct SBlockInfo { } SBlockInfo; // TODO: move this data struct out of the module -typedef struct SData { - int32_t num; - char * data; -} SData; +//typedef struct SData { +// int32_t num; +// char * data; +//} SData; -typedef struct SDataBlock { - int32_t numOfCols; - SData **pData; -} SDataBlock; +typedef struct SDataBlockInfo { + STimeWindow window; + int32_t size; + int32_t numOfCols; + int64_t uid; + int32_t sid; +} SDataBlockInfo; typedef struct STableIDList { STableId *tableIds; @@ -254,83 +227,107 @@ typedef struct STableIDList { typedef struct { } SFields; +#define TSDB_TS_GREATER_EQUAL 1 +#define TSDB_TS_LESS_EQUAL 2 + +typedef struct SQueryRowCond { + int32_t rel; + TSKEY ts; +} SQueryRowCond; + +typedef void *tsdbpos_t; + /** * Get the data block iterator, starting from position according to the query condition - * @param pRepo the TSDB repository to query on * @param pCond query condition, only includes the filter on primary time stamp * @param pTableList table sid list * @return */ -tsdb_query_handle_t *tsdbQueryFromTableID(tsdb_repo_t *pRepo, STSDBQueryCond *pCond, const STableIDList *pTableList); +tsdb_query_handle_t *tsdbQueryByTableId(STsdbQueryCond *pCond, SArray *idList, SArray *pColumnInfo); /** - * Get iterator for super tables, of which tags values satisfy the tag filter info - * - * NOTE: the tagFilterStr is an bin-expression for tag filter, such as ((tag_col = 5) and (tag_col2 > 7)) - * The filter string is sent from client directly. - * The build of the tags filter expression from string is done in the iterator generating function. - * - * @param pRepo the repository to query on - * @param pCond query condition - * @param pTagFilterStr tag filter info + * move to next block + * @param pQueryHandle * @return */ -tsdb_query_handle_t *tsdbQueryFromTagConds(tsdb_repo_t *pRepo, STSDBQueryCond *pCond, int16_t stableId, - const char *pTagFilterStr); +bool tsdbNextDataBlock(tsdb_query_handle_t *pQueryHandle); /** - * Reset to the start(end) position of current query, from which the iterator starts. + * Get current data block information * * @param pQueryHandle - * @param position set the iterator traverses position. (TSDB_POS_START|TSDB_POS_END) * @return */ -int32_t tsdbResetQuery(tsdb_query_handle_t *pQueryHandle, int16_t position); +SDataBlockInfo tsdbRetrieveDataBlockInfo(tsdb_query_handle_t *pQueryHandle); /** - * move to next block - * @param pQueryHandle - * @param pCond + * + * Get the pre-calculated information w.r.t. current data block. + * + * In case of data block in cache, the pBlockStatis will always be NULL. + * If a block is not completed loaded from disk, the pBlockStatis will be NULL. + + * @pBlockStatis the pre-calculated value for current data blocks. if the block is a cache block, always return 0 * @return */ -bool tsdbIterNext(tsdb_query_handle_t *pQueryHandle); +int32_t tsdbRetrieveDataBlockStatisInfo(tsdb_query_handle_t *pQueryHandle, SDataStatis **pBlockStatis); /** - * 当前数据块的信息,调用next函数后,只会获得block的信息,包括:行数、列数、skey/ekey信息。注意该信息并不是现在的SCompBlockInfo信息。 - * 因为SCompBlockInfo是完整的数据块信息,但是迭代器返回并不是。 - * 查询处理引擎会自己决定需要blockInfo, 还是预计算数据,亦或是完整的数据。 - * Get current data block information + * The query condition with primary timestamp is passed to iterator during its constructor function, + * the returned data block must be satisfied with the time window condition in any cases, + * which means the SData data block is not actually the completed disk data blocks. * * @param pQueryHandle * @return */ -SBlockInfo tsdbRetrieveDataBlockInfo(tsdb_query_handle_t *pQueryHandle); +SArray *tsdbRetrieveDataBlock(tsdb_query_handle_t *pQueryHandle, SArray *pIdList); /** - * 获取当前数据块的预计算信息,如果块不完整,无预计算信息,如果是cache块,无预计算信息。 + * todo remove the parameter of position, and order type * - * Get the pre-calculated information w.r.t. current data block. + * Reset to the start(end) position of current query, from which the iterator starts. * - * In case of data block in cache, the pBlockStatis will always be NULL. - * If a block is not completed loaded from disk, the pBlockStatis will be NULL. + * @param pQueryHandle + * @param position set the iterator traverses position + * @param order ascending order or descending order + * @return + */ +int32_t tsdbResetQuery(tsdb_query_handle_t *pQueryHandle, STimeWindow* window, tsdbpos_t position, int16_t order); - * @pBlockStatis the pre-calculated value for current data blocks. if the block is a cache block, always return 0 +/** + * return the access position of current query handle + * @param pQueryHandle * @return */ -int32_t tsdbRetrieveDataBlockStatisInfo(tsdb_query_handle_t *pQueryHandle, SFields *pBlockStatis); +int32_t tsdbDataBlockSeek(tsdb_query_handle_t *pQueryHandle, tsdbpos_t pos); /** - * 返回加载到缓存中的数据,可能是磁盘数据也可能是内存数据,对客户透明。即使是磁盘数据,返回的结果也是磁盘块中,满足查询时间范围要求的数据行,并不是一个完整的磁盘数 - * 据块。 + * todo remove this function later + * @param pQueryHandle + * @return + */ +tsdbpos_t tsdbDataBlockTell(tsdb_query_handle_t *pQueryHandle); + +/** + * todo remove this function later + * @param pQueryHandle + * @param pIdList + * @return + */ +SArray *tsdbRetrieveDataRow(tsdb_query_handle_t *pQueryHandle, SArray *pIdList, SQueryRowCond *pCond); + +/** + * Get iterator for super tables, of which tags values satisfy the tag filter info * - * The query condition with primary timestamp is passed to iterator during its constructor function, - * the returned data block must be satisfied with the time window condition in any cases, - * which means the SData data block is not actually the completed disk data blocks. + * NOTE: the tagFilterStr is an bin-expression for tag filter, such as ((tag_col = 5) and (tag_col2 > 7)) + * The filter string is sent from client directly. + * The build of the tags filter expression from string is done in the iterator generating function. * - * @param pQueryHandle + * @param pCond query condition + * @param pTagFilterStr tag filter info * @return */ -SDataBlock *tsdbRetrieveDataBlock(tsdb_query_handle_t *pQueryHandle); +tsdb_query_handle_t *tsdbQueryFromTagConds(STsdbQueryCond *pCond, int16_t stableId, const char *pTagFilterStr); /** * Get the qualified tables for (super) table query. diff --git a/src/vnode/tsdb/inc/tsdbCache.h b/src/vnode/tsdb/inc/tsdbCache.h index 8a78a6b19e4ff48945a90273b371717df8d285d5..3bffa1c6a9ea75688e2e44b0f356a42570856d52 100644 --- a/src/vnode/tsdb/inc/tsdbCache.h +++ b/src/vnode/tsdb/inc/tsdbCache.h @@ -17,45 +17,39 @@ #include -// #include "cache.h" +#include "tlist.h" #ifdef __cplusplus extern "C" { #endif -#define TSDB_DEFAULT_CACHE_BLOCK_SIZE 16*1024*1024 /* 16M */ +#define TSDB_DEFAULT_CACHE_BLOCK_SIZE 16 * 1024 * 1024 /* 16M */ typedef struct { - int64_t skey; // start key - int64_t ekey; // end key - int32_t numOfRows; // numOfRows -} STableCacheInfo; + int blockId; + int offset; + int remain; + int padding; + char data[]; +} STsdbCacheBlock; -typedef struct _tsdb_cache_block { - char * pData; - STableCacheInfo * pTableInfo; - struct _tsdb_cache_block *prev; - struct _tsdb_cache_block *next; -} STSDBCacheBlock; +typedef struct { + int64_t index; + SList * memPool; +} STsdbCachePool; -// Use a doublely linked list to implement this -typedef struct STSDBCache { - // Number of blocks the cache is allocated - int32_t numOfBlocks; - STSDBCacheBlock *cacheList; - void * current; +typedef struct { + int maxBytes; + int cacheBlockSize; + STsdbCachePool pool; + STsdbCacheBlock *curBlock; + SList * mem; + SList * imem; } STsdbCache; -// ---- Operation on STSDBCacheBlock -#define TSDB_CACHE_BLOCK_DATA(pBlock) ((pBlock)->pData) -#define TSDB_CACHE_AVAIL_SPACE(pBlock) ((char *)((pBlock)->pTableInfo) - ((pBlock)->pData)) -#define TSDB_TABLE_INFO_OF_CACHE(pBlock, tableId) ((pBlock)->pTableInfo)[tableId] -#define TSDB_NEXT_CACHE_BLOCK(pBlock) ((pBlock)->next) -#define TSDB_PREV_CACHE_BLOCK(pBlock) ((pBlock)->prev) - -STsdbCache *tsdbCreateCache(int32_t numOfBlocks); -int32_t tsdbFreeCache(STsdbCache *pCache); -void * tsdbAllocFromCache(STsdbCache *pCache, int64_t bytes); +STsdbCache *tsdbInitCache(int maxBytes, int cacheBlockSize); +void tsdbFreeCache(STsdbCache *pCache); +void * tsdbAllocFromCache(STsdbCache *pCache, int bytes); #ifdef __cplusplus } diff --git a/src/vnode/tsdb/inc/tsdbFile.h b/src/vnode/tsdb/inc/tsdbFile.h index dbcec496511576fabb5da4b47cf8b607f4733263..89159a06e71af6c95c546c0b149c657026ff5c2e 100644 --- a/src/vnode/tsdb/inc/tsdbFile.h +++ b/src/vnode/tsdb/inc/tsdbFile.h @@ -16,43 +16,56 @@ #define _TD_TSDB_FILE_H_ #include -// #include "tstring.h" + +#include "taosdef.h" #ifdef __cplusplus extern "C" { #endif -typedef int32_t file_id_t; - typedef enum { - TSDB_FILE_TYPE_HEAD, // .head file type - TSDB_FILE_TYPE_DATA, // .data file type - TSDB_FILE_TYPE_LAST, // .last file type - TSDB_FILE_TYPE_META // .meta file type + TSDB_FILE_TYPE_HEAD = 0, // .head file type + TSDB_FILE_TYPE_DATA, // .data file type + TSDB_FILE_TYPE_LAST, // .last file type + TSDB_FILE_TYPE_MAX } TSDB_FILE_TYPE; extern const char *tsdbFileSuffix[]; typedef struct { - int64_t fileSize; + int64_t size; + int64_t tombSize; } SFileInfo; typedef struct { - char * fname; - SFileInfo fInfo; -} SFILE; + int8_t type; + char fname[128]; + int64_t size; // total size of the file + int64_t tombSize; // unused file size +} SFile; -// typedef struct { -// int64_t offset; -// int64_t skey; -// int64_t ekey; -// int16_t numOfBlocks; -// } SDataBlock; +typedef struct { + int32_t fileId; + SFile files[TSDB_FILE_TYPE_MAX]; +} SFileGroup; + +// TSDB file handle +typedef struct { + int32_t daysPerFile; + int32_t keep; + int32_t minRowPerFBlock; + int32_t maxRowsPerFBlock; + int32_t maxTables; + SFileGroup fGroup[]; +} STsdbFileH; -#define IS_VALID_TSDB_FILE_TYPE(type) ((type) >= TSDB_FILE_TYPE_HEAD && (type) <= TSDB_FILE_TYPE_META) +#define IS_VALID_TSDB_FILE_TYPE(type) ((type) >= TSDB_FILE_TYPE_HEAD && (type) < TSDB_FILE_TYPE_MAX) -char *tsdbGetFileName(char *dirName, char *fname, TSDB_FILE_TYPE type); +STsdbFileH *tsdbInitFile(char *dataDir, int32_t daysPerFile, int32_t keep, int32_t minRowsPerFBlock, + int32_t maxRowsPerFBlock, int32_t maxTables); +void tsdbCloseFile(STsdbFileH *pFileH); +int tsdbCreateFileGroup(char *dataDir, int fileId, SFileGroup *pFGroup, int maxTables); #ifdef __cplusplus } #endif diff --git a/src/vnode/tsdb/inc/tsdbMeta.h b/src/vnode/tsdb/inc/tsdbMeta.h index efab26e1dbde5c5463a0b3c2e5e02ab4a2894c9a..38f0818dfba826ee2300b6c0f2eee3fe2232423f 100644 --- a/src/vnode/tsdb/inc/tsdbMeta.h +++ b/src/vnode/tsdb/inc/tsdbMeta.h @@ -20,6 +20,7 @@ #include "tsdb.h" #include "dataformat.h" #include "tskiplist.h" +#include "tsdbMetaFile.h" #ifdef __cplusplus extern "C" { @@ -30,62 +31,49 @@ extern "C" { // Initially, there are 4 tables #define TSDB_INIT_NUMBER_OF_SUPER_TABLE 4 -typedef enum { - TSDB_SUPER_TABLE, // super table - TSDB_NTABLE, // table not created from super table - TSDB_STABLE // table created from super table -} TSDB_TABLE_TYPE; - #define IS_CREATE_STABLE(pCfg) ((pCfg)->tagValues != NULL) +// ---------- TSDB TABLE DEFINITION typedef struct STable { - STableId tableId; - TSDB_TABLE_TYPE type; - - int64_t createdTime; - - // super table UID -1 for normal table - int32_t stableUid; - - int32_t numOfCols; - - // Schema for this table - // For TSDB_SUPER_TABLE, it is the schema including tags - // For TSDB_NTABLE, it is only the schema, not including tags - // For TSDB_STABLE, it is NULL - STSchema *pSchema; - - // Tag value for this table - // For TSDB_SUPER_TABLE and TSDB_NTABLE, it is NULL - // For TSDB_STABLE, it is the tag value string - SDataRow pTagVal; - - // Object content; - // For TSDB_SUPER_TABLE, it is the index of tables created from it - // For TSDB_STABLE and TSDB_NTABLE, it is the cache data + int8_t type; + STableId tableId; + int32_t superUid; // Super table UID + int32_t sversion; + STSchema *schema; + STSchema *tagSchema; + SDataRow tagVal; union { - void *pData; - void *pIndex; + void *pData; // For TSDB_NORMAL_TABLE and TSDB_CHILD_TABLE, it is the skiplist for cache data + void *pIndex; // For TSDB_SUPER_TABLE, it is the skiplist index } content; + void * iData; // Skiplist to commit + void * eventHandler; // TODO + void * streamHandler; // TODO + struct STable *next; // TODO: remove the next +} STable; - // A handle to deal with event - void *eventHandler; +void * tsdbEncodeTable(STable *pTable, int *contLen); +STable *tsdbDecodeTable(void *cont, int contLen); +void * tsdbFreeEncode(void *cont); - // A handle to deal with stream - void *streamHandler; +// ---------- TSDB META HANDLE DEFINITION +typedef struct { + int32_t maxTables; // Max number of tables - struct STable *next; + int32_t nTables; // Tables created -} STable; + STable **tables; // table array -typedef struct { - int32_t maxTables; - int32_t nTables; - STable **tables; // array of normal tables - STable * stables; // linked list of super tables // TODO use container to implement this - void * tableMap; // hash map of uid ==> STable * + STable *superList; // super table list TODO: change it to list container + + void *map; // table map of (uid ===> table) + + SMetaFile *mfh; // meta file handle } STsdbMeta; +STsdbMeta *tsdbInitMeta(const char *rootDir, int32_t maxTables); +int32_t tsdbFreeMeta(STsdbMeta *pMeta); + // ---- Operation on STable #define TSDB_TABLE_ID(pTable) ((pTable)->tableId) #define TSDB_TABLE_UID(pTable) ((pTable)->uid) @@ -97,21 +85,12 @@ typedef struct { #define TSDB_TABLE_CACHE_DATA(pTable) ((pTable)->content.pData) #define TSDB_SUPER_TABLE_INDEX(pTable) ((pTable)->content.pIndex) -STSchema *tsdbGetTableSchema(STable *pTable); - // ---- Operation on SMetaHandle #define TSDB_NUM_OF_TABLES(pHandle) ((pHandle)->numOfTables) #define TSDB_NUM_OF_SUPER_TABLES(pHandle) ((pHandle)->numOfSuperTables) #define TSDB_TABLE_OF_ID(pHandle, id) ((pHandle)->pTables)[id] #define TSDB_GET_TABLE_OF_NAME(pHandle, name) /* TODO */ -// Create a new meta handle with configuration -STsdbMeta *tsdbCreateMeta(int32_t maxTables); -int32_t tsdbFreeMeta(STsdbMeta *pMeta); - -// Recover the meta handle from the file -STsdbMeta *tsdbOpenMeta(char *tsdbDir); - int32_t tsdbCreateTableImpl(STsdbMeta *pMeta, STableCfg *pCfg); int32_t tsdbDropTableImpl(STsdbMeta *pMeta, STableId tableId); STable *tsdbIsValidTableToInsert(STsdbMeta *pMeta, STableId tableId); diff --git a/src/vnode/tsdb/inc/tsdbMetaFile.h b/src/vnode/tsdb/inc/tsdbMetaFile.h new file mode 100644 index 0000000000000000000000000000000000000000..a0cf2a005cd549acd4500bd20f811d3b1afcbd3d --- /dev/null +++ b/src/vnode/tsdb/inc/tsdbMetaFile.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#ifndef _TSDB_META_FILE_ +#define _TSDB_META_FILE_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define TSDB_META_FILE_NAME "META" +#define TSDB_META_HASH_FRACTION 1.1 + +typedef int (*iterFunc)(void *, void *cont, int contLen); +typedef void (*afterFunc)(void *); + +typedef struct { + int fd; // File descriptor + int nDel; // number of deletions + int tombSize; // deleted size + int64_t size; // Total file size + void * map; // Map from uid ==> position + iterFunc iFunc; + afterFunc aFunc; + void * appH; +} SMetaFile; + +SMetaFile *tsdbInitMetaFile(char *rootDir, int32_t maxTables, iterFunc iFunc, afterFunc aFunc, void *appH); +int32_t tsdbInsertMetaRecord(SMetaFile *mfh, int64_t uid, void *cont, int32_t contLen); +int32_t tsdbDeleteMetaRecord(SMetaFile *mfh, int64_t uid); +int32_t tsdbUpdateMetaRecord(SMetaFile *mfh, int64_t uid, void *cont, int32_t contLen); +void tsdbCloseMetaFile(SMetaFile *mfh); + +#ifdef __cplusplus +} +#endif + +#endif // _TSDB_META_FILE_ \ No newline at end of file diff --git a/src/vnode/tsdb/src/tsdbCache.c b/src/vnode/tsdb/src/tsdbCache.c index dacb36025370a27267bdc72557408c3f8db93974..6a0741dced475965527ba61213032a00e3e197dd 100644 --- a/src/vnode/tsdb/src/tsdbCache.c +++ b/src/vnode/tsdb/src/tsdbCache.c @@ -16,22 +16,106 @@ #include "tsdbCache.h" -STsdbCache *tsdbCreateCache(int32_t numOfBlocks) { - STsdbCache *pCacheHandle = (STsdbCache *)malloc(sizeof(STsdbCache)); - if (pCacheHandle == NULL) { - // TODO : deal with the error - return NULL; +static int tsdbAllocBlockFromPool(STsdbCache *pCache); +static void tsdbFreeBlockList(SList *list); + +STsdbCache *tsdbInitCache(int maxBytes, int cacheBlockSize) { + STsdbCache *pCache = (STsdbCache *)calloc(1, sizeof(STsdbCache)); + if (pCache == NULL) return NULL; + + if (cacheBlockSize < 0) cacheBlockSize = TSDB_DEFAULT_CACHE_BLOCK_SIZE; + + pCache->maxBytes = maxBytes; + pCache->cacheBlockSize = cacheBlockSize; + + int nBlocks = maxBytes / cacheBlockSize + 1; + if (nBlocks <= 1) nBlocks = 2; + + STsdbCachePool *pPool = &(pCache->pool); + pPool->index = 0; + pPool->memPool = tdListNew(sizeof(STsdbCacheBlock *)); + if (pPool->memPool == NULL) goto _err; + + for (int i = 0; i < nBlocks; i++) { + STsdbCacheBlock *pBlock = (STsdbCacheBlock *)malloc(sizeof(STsdbCacheBlock) + cacheBlockSize); + if (pBlock == NULL) { + goto _err; + } + pBlock->offset = 0; + pBlock->remain = cacheBlockSize; + tdListAppend(pPool->memPool, (void *)(&pBlock)); } - return pCacheHandle; + pCache->mem = tdListNew(sizeof(STsdbCacheBlock *)); + if (pCache->mem == NULL) goto _err; + + pCache->imem = tdListNew(sizeof(STsdbCacheBlock *)); + if (pCache->imem == NULL) goto _err; + + return pCache; + +_err: + tsdbFreeCache(pCache); + return NULL; +} + +void tsdbFreeCache(STsdbCache *pCache) { + tsdbFreeBlockList(pCache->imem); + tsdbFreeBlockList(pCache->mem); + tsdbFreeBlockList(pCache->pool.memPool); + free(pCache); } -int32_t tsdbFreeCache(STsdbCache *pHandle) { return 0; } +void *tsdbAllocFromCache(STsdbCache *pCache, int bytes) { + if (pCache == NULL) return NULL; + if (bytes > pCache->cacheBlockSize) return NULL; -void *tsdbAllocFromCache(STsdbCache *pCache, int64_t bytes) { - // TODO: implement here - void *ptr = malloc(bytes); - if (ptr == NULL) return NULL; + if (isListEmpty(pCache->mem)) { + if (tsdbAllocBlockFromPool(pCache) < 0) { + // TODO: deal with the error + } + } + + if (pCache->curBlock->remain < bytes) { + if (tsdbAllocBlockFromPool(pCache) < 0) { + // TODO: deal with the error + } + } + + void *ptr = (void *)(pCache->curBlock->data + pCache->curBlock->offset); + pCache->curBlock->offset += bytes; + pCache->curBlock->remain -= bytes; + memset(ptr, 0, bytes); return ptr; +} + +static void tsdbFreeBlockList(SList *list) { + if (list == NULL) return; + SListNode * node = NULL; + STsdbCacheBlock *pBlock = NULL; + while ((node = tdListPopHead(list)) != NULL) { + tdListNodeGetData(list, node, (void *)(&pBlock)); + free(pBlock); + listNodeFree(node); + } + tdListFree(list); +} + +static int tsdbAllocBlockFromPool(STsdbCache *pCache) { + STsdbCachePool *pPool = &(pCache->pool); + if (listNEles(pPool->memPool) == 0) return -1; + + SListNode *node = tdListPopHead(pPool->memPool); + + STsdbCacheBlock *pBlock = NULL; + tdListNodeGetData(pPool->memPool, node, (void *)(&pBlock)); + pBlock->blockId = pPool->index++; + pBlock->offset = 0; + pBlock->remain = pCache->cacheBlockSize; + + tdListAppendNode(pCache->mem, node); + pCache->curBlock = pBlock; + + return 0; } \ No newline at end of file diff --git a/src/vnode/tsdb/src/tsdbFile.c b/src/vnode/tsdb/src/tsdbFile.c index 6009d160e3b4ae80aed19024baf6dc2639929c57..8a7e40cabd0d7e864c9190009cd4a52deaf436cd 100644 --- a/src/vnode/tsdb/src/tsdbFile.c +++ b/src/vnode/tsdb/src/tsdbFile.c @@ -12,25 +12,224 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ +#include +#include +#include #include #include #include +#include +#include +#include +#include "tglobalcfg.h" #include "tsdbFile.h" +#define TSDB_FILE_HEAD_SIZE 512 +#define TSDB_FILE_DELIMITER 0xF00AFA0F + +#define tsdbGetKeyFileId(key, daysPerFile, precision) ((key) / tsMsPerDay[(precision)] / (daysPerFile)) +#define tsdbGetMaxNumOfFiles(keep, daysPerFile) ((keep) / (daysPerFile) + 3) + +typedef struct { + int32_t len; + int32_t padding; // For padding purpose + int64_t offset; +} SCompIdx; + +/** + * if numOfSubBlocks == -1, then the SCompBlock is a sub-block + * if numOfSubBlocks == 1, then the SCompBlock refers to the data block, and offset/len refer to + * the data block offset and length + * if numOfSubBlocks > 1, then the offset/len refer to the offset of the first sub-block in the + * binary + */ +typedef struct { + int64_t last : 1; // If the block in data file or last file + int64_t offset : 63; // Offset of data block or sub-block index depending on numOfSubBlocks + int32_t algorithm : 8; // Compression algorithm + int32_t numOfPoints : 24; // Number of total points + int32_t sversion; // Schema version + int32_t len; // Data block length or nothing + int16_t numOfSubBlocks; // Number of sub-blocks; + int16_t numOfCols; + TSKEY keyFirst; + TSKEY keyLast; +} SCompBlock; + +typedef struct { + int32_t delimiter; // For recovery usage + int32_t checksum; // TODO: decide if checksum logic in this file or make it one API + int64_t uid; + int32_t padding; // For padding purpose + int32_t numOfBlocks; // TODO: make the struct padding + SCompBlock blocks[]; +} SCompInfo; + +// TODO: take pre-calculation into account +typedef struct { + int16_t colId; // Column ID + int16_t len; // Column length + int32_t type : 8; + int32_t offset : 24; +} SCompCol; + +// TODO: Take recover into account +typedef struct { + int32_t delimiter; // For recovery usage + int32_t numOfCols; // For recovery usage + int64_t uid; // For recovery usage + SCompCol cols[]; +} SCompData; + const char *tsdbFileSuffix[] = { ".head", // TSDB_FILE_TYPE_HEAD ".data", // TSDB_FILE_TYPE_DATA - ".last", // TSDB_FILE_TYPE_LAST - ".meta" // TSDB_FILE_TYPE_META + ".last" // TSDB_FILE_TYPE_LAST }; -char *tsdbGetFileName(char *dirName, char *fname, TSDB_FILE_TYPE type) { - if (!IS_VALID_TSDB_FILE_TYPE(type)) return NULL; +static int tsdbWriteFileHead(int fd, SFile *pFile) { + char head[TSDB_FILE_HEAD_SIZE] = "\0"; + + pFile->size += TSDB_FILE_HEAD_SIZE; + + // TODO: write version and File statistic to the head + lseek(fd, 0, SEEK_SET); + if (write(fd, head, TSDB_FILE_HEAD_SIZE) < 0) return -1; + + return 0; +} + +static int tsdbWriteHeadFileIdx(int fd, int maxTables, SFile *pFile) { + int size = sizeof(SCompIdx) * maxTables; + void *buf = calloc(1, size); + if (buf == NULL) return -1; + + if (lseek(fd, TSDB_FILE_HEAD_SIZE, SEEK_SET) < 0) { + free(buf); + return -1; + } + + if (write(fd, buf, size) < 0) { + free(buf); + return -1; + } + + pFile->size += size; + + return 0; +} + +static int tsdbGetFileName(char *dataDir, int fileId, int8_t type, char *fname) { + if (dataDir == NULL || fname == NULL || !IS_VALID_TSDB_FILE_TYPE(type)) return -1; + + sprintf(fname, "%s/f%d%s", dataDir, fileId, tsdbFileSuffix[type]); + + return 0; +} + +/** + * Create a file and set the SFile object + */ +static int tsdbCreateFile(char *dataDir, int fileId, int8_t type, int maxTables, SFile *pFile) { + memset((void *)pFile, 0, sizeof(SFile)); + pFile->type = type; + + tsdbGetFileName(dataDir, fileId, type, pFile->fname); + if (access(pFile->fname, F_OK) == 0) { + // File already exists + return -1; + } + + int fd = open(pFile->fname, O_WRONLY | O_CREAT, 0755); + if (fd < 0) return -1; + + if (type == TSDB_FILE_TYPE_HEAD) { + if (tsdbWriteHeadFileIdx(fd, maxTables, pFile) < 0) { + close(fd); + return -1; + } + } + + if (tsdbWriteFileHead(fd, pFile) < 0) { + close(fd); + return -1; + } + + close(fd); + + return 0; +} + +static int tsdbRemoveFile(SFile *pFile) { + if (pFile == NULL) return -1; + return remove(pFile->fname); +} + +// Create a file group with fileId and return a SFileGroup object +int tsdbCreateFileGroup(char *dataDir, int fileId, SFileGroup *pFGroup, int maxTables) { + if (dataDir == NULL || pFGroup == NULL) return -1; + + memset((void *)pFGroup, 0, sizeof(SFileGroup)); + + for (int type = TSDB_FILE_TYPE_HEAD; type < TSDB_FILE_TYPE_MAX; type++) { + if (tsdbCreateFile(dataDir, fileId, type, maxTables, &(pFGroup->files[type])) < 0) { + // TODO: deal with the error here, remove the created files + return -1; + } + } + + pFGroup->fileId = fileId; + + return 0; +} + +/** + * Initialize the TSDB file handle + */ +STsdbFileH *tsdbInitFile(char *dataDir, int32_t daysPerFile, int32_t keep, int32_t minRowsPerFBlock, + int32_t maxRowsPerFBlock, int32_t maxTables) { + STsdbFileH *pTsdbFileH = + (STsdbFileH *)calloc(1, sizeof(STsdbFileH) + sizeof(SFileGroup) * tsdbGetMaxNumOfFiles(keep, daysPerFile)); + if (pTsdbFileH == NULL) return NULL; + + pTsdbFileH->daysPerFile = daysPerFile; + pTsdbFileH->keep = keep; + pTsdbFileH->minRowPerFBlock = minRowsPerFBlock; + pTsdbFileH->maxRowsPerFBlock = maxRowsPerFBlock; + pTsdbFileH->maxTables = maxTables; + + // Open the directory to read information of each file + DIR *dir = opendir(dataDir); + if (dir == NULL) { + free(pTsdbFileH); + return NULL; + } + + char fname[256]; + + struct dirent *dp; + while ((dp = readdir(dir)) != NULL) { + if (strncmp(dp->d_name, ".", 1) == 0 || strncmp(dp->d_name, "..", 2) == 0) continue; + if (true /* check if the file is the .head file */) { + int fileId = 0; + int vgId = 0; + sscanf(dp->d_name, "v%df%d.head", &vgId, &fileId); + // TODO + + // Open head file + + // Open data file + + // Open last file + } + } - char *fileName = (char *)malloc(strlen(dirName) + strlen(fname) + strlen(tsdbFileSuffix[type]) + 5); - if (fileName == NULL) return NULL; + return pTsdbFileH; +} - sprintf(fileName, "%s/%s%s", dirName, fname, tsdbFileSuffix[type]); - return fileName; +static void tsdbGetKeyRangeOfFileId(int32_t daysPerFile, int8_t precision, int32_t fileId, TSKEY *minKey, + TSKEY *maxKey) { + *minKey = fileId * daysPerFile * tsMsPerDay[precision]; + *maxKey = *minKey + daysPerFile * tsMsPerDay[precision] - 1; } \ No newline at end of file diff --git a/src/vnode/tsdb/src/tsdbMain.c b/src/vnode/tsdb/src/tsdbMain.c index 935bf6281c9a3f646cf21a53a3fd2492965f88aa..ed95eac5bc9fc8c41b78fb2944932d4909415ab2 100644 --- a/src/vnode/tsdb/src/tsdbMain.c +++ b/src/vnode/tsdb/src/tsdbMain.c @@ -42,6 +42,9 @@ #define TSDB_MIN_CACHE_SIZE (4 * 1024 * 1024) // 4M #define TSDB_MAX_CACHE_SIZE (1024 * 1024 * 1024) // 1G +#define TSDB_CFG_FILE_NAME "CONFIG" +#define TSDB_DATA_DIR_NAME "data" + enum { TSDB_REPO_STATE_ACTIVE, TSDB_REPO_STATE_CLOSED, TSDB_REPO_STATE_CONFIGURING }; typedef struct _tsdb_repo { @@ -55,13 +58,16 @@ typedef struct _tsdb_repo { // The cache Handle STsdbCache *tsdbCache; + // The TSDB file handle + STsdbFileH *tsdbFileH; + // Disk tier handle for multi-tier storage void *diskTier; - // File Store - void *tsdbFiles; + pthread_mutex_t mutex; - pthread_mutex_t tsdbMutex; + int commit; + pthread_t commitThread; // A limiter to monitor the resources used by tsdb void *limiter; @@ -74,17 +80,21 @@ static int32_t tsdbCheckAndSetDefaultCfg(STsdbCfg *pCfg); static int32_t tsdbSetRepoEnv(STsdbRepo *pRepo); static int32_t tsdbDestroyRepoEnv(STsdbRepo *pRepo); static int tsdbOpenMetaFile(char *tsdbDir); -static int tsdbRecoverRepo(int fd, STsdbCfg *pCfg); -static int32_t tsdbInsertDataToTable(tsdb_repo_t *repo, SSubmitBlock *pBlock); +static int32_t tsdbInsertDataToTable(tsdb_repo_t *repo, SSubmitBlk *pBlock); +static int32_t tsdbRestoreCfg(STsdbRepo *pRepo, STsdbCfg *pCfg); +static int32_t tsdbGetDataDirName(STsdbRepo *pRepo, char *fname); +static void * tsdbCommitToFile(void *arg); #define TSDB_GET_TABLE_BY_ID(pRepo, sid) (((STSDBRepo *)pRepo)->pTableList)[sid] #define TSDB_GET_TABLE_BY_NAME(pRepo, name) #define TSDB_IS_REPO_ACTIVE(pRepo) ((pRepo)->state == TSDB_REPO_STATE_ACTIVE) #define TSDB_IS_REPO_CLOSED(pRepo) ((pRepo)->state == TSDB_REPO_STATE_CLOSED) -STsdbCfg *tsdbCreateDefaultCfg() { - STsdbCfg *pCfg = (STsdbCfg *)malloc(sizeof(STsdbCfg)); - if (pCfg == NULL) return NULL; +/** + * Set the default TSDB configuration + */ +void tsdbSetDefaultCfg(STsdbCfg *pCfg) { + if (pCfg == NULL) return; pCfg->precision = -1; pCfg->tsdbId = 0; @@ -94,6 +104,18 @@ STsdbCfg *tsdbCreateDefaultCfg() { pCfg->maxRowsPerFileBlock = -1; pCfg->keep = -1; pCfg->maxCacheSize = -1; +} + +/** + * Create a configuration for TSDB default + * @return a pointer to a configuration. the configuration object + * must call tsdbFreeCfg to free memory after usage + */ +STsdbCfg *tsdbCreateDefaultCfg() { + STsdbCfg *pCfg = (STsdbCfg *)malloc(sizeof(STsdbCfg)); + if (pCfg == NULL) return NULL; + + tsdbSetDefaultCfg(pCfg); return pCfg; } @@ -102,7 +124,15 @@ void tsdbFreeCfg(STsdbCfg *pCfg) { if (pCfg != NULL) free(pCfg); } -tsdb_repo_t *tsdbCreateRepo(char *rootDir, STsdbCfg *pCfg, void *limiter) { +/** + * Create a new TSDB repository + * @param rootDir the TSDB repository root directory + * @param pCfg the TSDB repository configuration, upper layer need to free the pointer + * @param limiter the limitation tracker will implement in the future, make it void now + * + * @return a TSDB repository handle on success, NULL for failure + */ +tsdb_repo_t *tsdbCreateRepo(char *rootDir, STsdbCfg *pCfg, void *limiter /* TODO */) { if (rootDir == NULL) return NULL; if (access(rootDir, F_OK | R_OK | W_OK) == -1) return NULL; @@ -120,26 +150,41 @@ tsdb_repo_t *tsdbCreateRepo(char *rootDir, STsdbCfg *pCfg, void *limiter) { pRepo->config = *pCfg; pRepo->limiter = limiter; - pRepo->tsdbMeta = tsdbCreateMeta(pCfg->maxTables); - if (pRepo->tsdbMeta == NULL) { + // Create the environment files and directories + if (tsdbSetRepoEnv(pRepo) < 0) { free(pRepo->rootDir); free(pRepo); return NULL; } - pRepo->tsdbCache = tsdbCreateCache(5); - if (pRepo->tsdbCache == NULL) { + // Initialize meta + STsdbMeta *pMeta = tsdbInitMeta(rootDir, pCfg->maxTables); + if (pMeta == NULL) { free(pRepo->rootDir); - tsdbFreeMeta(pRepo->tsdbMeta); free(pRepo); return NULL; } + pRepo->tsdbMeta = pMeta; - // Create the Meta data file and data directory - if (tsdbSetRepoEnv(pRepo) < 0) { + // Initialize cache + STsdbCache *pCache = tsdbInitCache(pCfg->maxCacheSize, -1); + if (pCache == NULL) { free(pRepo->rootDir); tsdbFreeMeta(pRepo->tsdbMeta); + free(pRepo); + return NULL; + } + pRepo->tsdbCache = pCache; + + // Initialize file handle + char dataDir[128] = "\0"; + tsdbGetDataDirName(pRepo, dataDir); + pRepo->tsdbFileH = + tsdbInitFile(dataDir, pCfg->daysPerFile, pCfg->keep, pCfg->minRowsPerFileBlock, pCfg->maxRowsPerFileBlock, pCfg->maxTables); + if (pRepo->tsdbFileH == NULL) { + free(pRepo->rootDir); tsdbFreeCache(pRepo->tsdbCache); + tsdbFreeMeta(pRepo->tsdbMeta); free(pRepo); return NULL; } @@ -149,6 +194,13 @@ tsdb_repo_t *tsdbCreateRepo(char *rootDir, STsdbCfg *pCfg, void *limiter) { return (tsdb_repo_t *)pRepo; } +/** + * Close and free all resources taken by the repository + * @param repo the TSDB repository handle. The interface will free the handle too, so upper + * layer do NOT need to free the repo handle again. + * + * @return 0 for success, -1 for failure and the error number is set + */ int32_t tsdbDropRepo(tsdb_repo_t *repo) { STsdbRepo *pRepo = (STsdbRepo *)repo; @@ -169,6 +221,12 @@ int32_t tsdbDropRepo(tsdb_repo_t *repo) { return 0; } +/** + * Open an existing TSDB storage repository + * @param tsdbDir the existing TSDB root directory + * + * @return a TSDB repository handle on success, NULL for failure and the error number is set + */ tsdb_repo_t *tsdbOpenRepo(char *tsdbDir) { if (access(tsdbDir, F_OK | W_OK | R_OK) < 0) { return NULL; @@ -179,25 +237,25 @@ tsdb_repo_t *tsdbOpenRepo(char *tsdbDir) { return NULL; } - int fd = tsdbOpenMetaFile(tsdbDir); - if (fd < 0) { - free(pRepo); - return NULL; - } + pRepo->rootDir = strdup(tsdbDir); - if (tsdbRecoverRepo(fd, &(pRepo->config)) < 0) { - close(fd); + tsdbRestoreCfg(pRepo, &(pRepo->config)); + + pRepo->tsdbMeta = tsdbInitMeta(tsdbDir, pRepo->config.maxTables); + if (pRepo->tsdbMeta == NULL) { + free(pRepo->rootDir); free(pRepo); return NULL; } - pRepo->tsdbCache = tsdbCreateCache(5); + pRepo->tsdbCache = tsdbInitCache(pRepo->config.maxCacheSize, -1); if (pRepo->tsdbCache == NULL) { - // TODO: deal with error + tsdbFreeMeta(pRepo->tsdbMeta); + free(pRepo->rootDir); + free(pRepo); return NULL; } - pRepo->rootDir = strdup(tsdbDir); pRepo->state = TSDB_REPO_STATE_ACTIVE; return (tsdb_repo_t *)pRepo; @@ -208,6 +266,13 @@ static int32_t tsdbFlushCache(STsdbRepo *pRepo) { return 0; } +/** + * Close a TSDB repository. Only free memory resources, and keep the files. + * @param repo the opened TSDB repository handle. The interface will free the handle too, so upper + * layer do NOT need to free the repo handle again. + * + * @return 0 for success, -1 for failure and the error number is set + */ int32_t tsdbCloseRepo(tsdb_repo_t *repo) { STsdbRepo *pRepo = (STsdbRepo *)repo; if (pRepo == NULL) return 0; @@ -223,6 +288,12 @@ int32_t tsdbCloseRepo(tsdb_repo_t *repo) { return 0; } +/** + * Change the configuration of a repository + * @param pCfg the repository configuration, the upper layer should free the pointer + * + * @return 0 for success, -1 for failure and the error number is set + */ int32_t tsdbConfigRepo(tsdb_repo_t *repo, STsdbCfg *pCfg) { STsdbRepo *pRepo = (STsdbRepo *)repo; @@ -231,23 +302,56 @@ int32_t tsdbConfigRepo(tsdb_repo_t *repo, STsdbCfg *pCfg) { return 0; } +int32_t tsdbTriggerCommit(tsdb_repo_t *repo) { + STsdbRepo *pRepo = (STsdbRepo *)repo; + + if (pthread_mutex_lock(&(pRepo->mutex)) < 0) return -1; + if (pRepo->commit) return 0; + pRepo->commit = 1; + // Loop to move pData to iData + for (int i = 0; i < pRepo->config.maxTables; i++) { + STable *pTable = pRepo->tsdbMeta->tables[i]; + if (pTable != NULL) { + void *pData = pTable->content.pData; + pTable->content.pData = NULL; + pTable->iData = pData; + } + } + // Loop to move mem to imem + tdListMove(pRepo->tsdbCache->mem, pRepo->tsdbCache->imem); + + pthread_create(&(pRepo->commitThread), NULL, tsdbCommitToFile, (void *)repo); + pthread_mutex_unlock(&(pRepo->mutex)); + + pthread_join(pRepo->commitThread, NULL); + + return 0; +} + +/** + * Get the TSDB repository information, including some statistics + * @param pRepo the TSDB repository handle + * @param error the error number to set when failure occurs + * + * @return a info struct handle on success, NULL for failure and the error number is set. The upper + * layers should free the info handle themselves or memory leak will occur + */ STsdbRepoInfo *tsdbGetStatus(tsdb_repo_t *pRepo) { // TODO return NULL; } -int32_t tsdbCreateTable(tsdb_repo_t *repo, STableCfg *pCfg) { +int tsdbCreateTable(tsdb_repo_t *repo, STableCfg *pCfg) { STsdbRepo *pRepo = (STsdbRepo *)repo; return tsdbCreateTableImpl(pRepo->tsdbMeta, pCfg); } -int32_t tsdbAlterTable(tsdb_repo_t *pRepo, STableCfg *pCfg) { +int tsdbAlterTable(tsdb_repo_t *pRepo, STableCfg *pCfg) { // TODO return 0; } -int32_t tsdbDropTable(tsdb_repo_t *repo, STableId tableId) { - // TODO +int tsdbDropTable(tsdb_repo_t *repo, STableId tableId) { if (repo == NULL) return -1; STsdbRepo *pRepo = (STsdbRepo *)repo; @@ -261,18 +365,150 @@ STableInfo *tsdbGetTableInfo(tsdb_repo_t *pRepo, STableId tableId) { // TODO: need to return the number of data inserted int32_t tsdbInsertData(tsdb_repo_t *repo, SSubmitMsg *pMsg) { - SSubmitBlock *pBlock = (SSubmitBlock *)pMsg->data; + SSubmitMsgIter msgIter; - for (int i = 0; i < pMsg->numOfTables; i++) { // Loop to deal with the submit message + tsdbInitSubmitMsgIter(pMsg, &msgIter); + SSubmitBlk *pBlock; + while ((pBlock = tsdbGetSubmitMsgNext(&msgIter)) != NULL) { if (tsdbInsertDataToTable(repo, pBlock) < 0) { return -1; } - pBlock = (SSubmitBlock *)(((char *)pBlock) + sizeof(SSubmitBlock) + pBlock->len); } return 0; } +/** + * Initialize a table configuration + */ +int tsdbInitTableCfg(STableCfg *config, TSDB_TABLE_TYPE type, int64_t uid, int32_t tid) { + if (config == NULL) return -1; + if (type != TSDB_NORMAL_TABLE && type != TSDB_CHILD_TABLE) return -1; + + memset((void *)config, 0, sizeof(STableCfg)); + + config->type = type; + config->superUid = TSDB_INVALID_SUPER_TABLE_ID; + config->tableId.uid = uid; + config->tableId.tid = tid; + return 0; +} + +/** + * Set the super table UID of the created table + */ +int tsdbTableSetSuperUid(STableCfg *config, int64_t uid) { + if (config->type != TSDB_CHILD_TABLE) return -1; + if (uid == TSDB_INVALID_SUPER_TABLE_ID) return -1; + + config->superUid = uid; + return 0; +} + +/** + * Set the table schema in the configuration + * @param config the configuration to set + * @param pSchema the schema to set + * @param dup use the schema directly or duplicate one for use + * + * @return 0 for success and -1 for failure + */ +int tsdbTableSetSchema(STableCfg *config, STSchema *pSchema, bool dup) { + if (dup) { + config->schema = tdDupSchema(pSchema); + } else { + config->schema = pSchema; + } + return 0; +} + +/** + * Set the table schema in the configuration + * @param config the configuration to set + * @param pSchema the schema to set + * @param dup use the schema directly or duplicate one for use + * + * @return 0 for success and -1 for failure + */ +int tsdbTableSetTagSchema(STableCfg *config, STSchema *pSchema, bool dup) { + if (config->type != TSDB_CHILD_TABLE) return -1; + + if (dup) { + config->tagSchema = tdDupSchema(pSchema); + } else { + config->tagSchema = pSchema; + } + return 0; +} + +int tsdbTableSetTagValue(STableCfg *config, SDataRow row, bool dup) { + if (config->type != TSDB_CHILD_TABLE) return -1; + + if (dup) { + config->tagValues = tdDataRowDup(row); + } else { + config->tagValues = row; + } + + return 0; +} + +void tsdbClearTableCfg(STableCfg *config) { + if (config->schema) tdFreeSchema(config->schema); + if (config->tagSchema) tdFreeSchema(config->tagSchema); + if (config->tagValues) tdFreeDataRow(config->tagValues); +} + +int tsdbInitSubmitBlkIter(SSubmitBlk *pBlock, SSubmitBlkIter *pIter) { + if (pBlock->len <= 0) return -1; + pIter->totalLen = pBlock->len; + pIter->len = 0; + pIter->row = (SDataRow)(pBlock->data); + return 0; +} + +SDataRow tsdbGetSubmitBlkNext(SSubmitBlkIter *pIter) { + SDataRow row = pIter->row; + if (row == NULL) return NULL; + + pIter->len += dataRowLen(row); + if (pIter->len >= pIter->totalLen) { + pIter->row = NULL; + } else { + pIter->row = (char *)row + dataRowLen(row); + } + + return row; +} + +int tsdbInitSubmitMsgIter(SSubmitMsg *pMsg, SSubmitMsgIter *pIter) { + if (pMsg == NULL || pIter == NULL) return -1; + + pIter->totalLen = pMsg->length; + pIter->len = TSDB_SUBMIT_MSG_HEAD_SIZE; + if (pMsg->length <= TSDB_SUBMIT_MSG_HEAD_SIZE) { + pIter->pBlock = NULL; + } else { + pIter->pBlock = pMsg->blocks; + } + + return 0; +} + +SSubmitBlk *tsdbGetSubmitMsgNext(SSubmitMsgIter *pIter) { + SSubmitBlk *pBlock = pIter->pBlock; + if (pBlock == NULL) return NULL; + + pIter->len = pIter->len + sizeof(SSubmitBlk) + pBlock->len; + if (pIter->len >= pIter->totalLen) { + pIter->pBlock = NULL; + } else { + pIter->pBlock = (SSubmitBlk *)((char *)pBlock + pBlock->len + sizeof(SSubmitBlk)); + } + + return pBlock; +} + // Check the configuration and set default options static int32_t tsdbCheckAndSetDefaultCfg(STsdbCfg *pCfg) { // Check precision @@ -285,7 +521,7 @@ static int32_t tsdbCheckAndSetDefaultCfg(STsdbCfg *pCfg) { // Check tsdbId if (pCfg->tsdbId < 0) return -1; - // Check MaxTables + // Check maxTables if (pCfg->maxTables == -1) { pCfg->maxTables = TSDB_DEFAULT_TABLES; } else { @@ -333,10 +569,18 @@ static int32_t tsdbCheckAndSetDefaultCfg(STsdbCfg *pCfg) { return 0; } -static int32_t tsdbSetRepoEnv(STsdbRepo *pRepo) { - char *metaFname = tsdbGetFileName(pRepo->rootDir, "tsdb", TSDB_FILE_TYPE_META); +static int32_t tsdbGetCfgFname(STsdbRepo *pRepo, char *fname) { + if (pRepo == NULL) return -1; + sprintf(fname, "%s/%s", pRepo->rootDir, TSDB_CFG_FILE_NAME); + return 0; +} + +static int32_t tsdbSaveConfig(STsdbRepo *pRepo) { + char fname[128] = "\0"; // TODO: get rid of the literal 128 - int fd = open(metaFname, O_WRONLY | O_CREAT); + if (tsdbGetCfgFname(pRepo, fname) < 0) return -1; + + int fd = open(fname, O_WRONLY | O_CREAT, 0755); if (fd < 0) { return -1; } @@ -345,19 +589,45 @@ static int32_t tsdbSetRepoEnv(STsdbRepo *pRepo) { return -1; } - // Create the data file - char *dirName = calloc(1, strlen(pRepo->rootDir) + strlen("tsdb") + 2); - if (dirName == NULL) { + close(fd); + return 0; +} + +static int32_t tsdbRestoreCfg(STsdbRepo *pRepo, STsdbCfg *pCfg) { + char fname[128] = "\0"; + + if (tsdbGetCfgFname(pRepo, fname) < 0) return -1; + + int fd = open(fname, O_RDONLY); + if (fd < 0) { return -1; } - sprintf(dirName, "%s/%s", pRepo->rootDir, "tsdb"); - if (mkdir(dirName, 0755) < 0) { - free(dirName); + if (read(fd, (void *)pCfg, sizeof(STsdbCfg)) < sizeof(STsdbCfg)) { + close(fd); return -1; } - free(dirName); + close(fd); + + return 0; +} + +static int32_t tsdbGetDataDirName(STsdbRepo *pRepo, char *fname) { + if (pRepo == NULL || pRepo->rootDir == NULL) return -1; + sprintf(fname, "%s/%s", pRepo->rootDir, TSDB_DATA_DIR_NAME); + return 0; +} + +static int32_t tsdbSetRepoEnv(STsdbRepo *pRepo) { + if (tsdbSaveConfig(pRepo) < 0) return -1; + + char dirName[128] = "\0"; + if (tsdbGetDataDirName(pRepo, dirName) < 0) return -1; + + if (mkdir(dirName, 0755) < 0) { + return -1; + } return 0; } @@ -386,9 +656,6 @@ static int32_t tsdbDestroyRepoEnv(STsdbRepo *pRepo) { rmdir(dirName); - char *metaFname = tsdbGetFileName(pRepo->rootDir, "tsdb", TSDB_FILE_TYPE_META); - remove(metaFname); - return 0; } @@ -397,12 +664,6 @@ static int tsdbOpenMetaFile(char *tsdbDir) { return 0; } -static int tsdbRecoverRepo(int fd, STsdbCfg *pCfg) { - // TODO: read tsdb configuration from file - // recover tsdb meta - return 0; -} - static int32_t tdInsertRowToTable(STsdbRepo *pRepo, SDataRow row, STable *pTable) { // TODO int32_t level = 0; @@ -417,7 +678,7 @@ static int32_t tdInsertRowToTable(STsdbRepo *pRepo, SDataRow row, STable *pTable } pNode->level = level; - tdDataRowCpy(SL_GET_NODE_DATA(pNode), row); + dataRowCpy(SL_GET_NODE_DATA(pNode), row); // Insert the skiplist node into the data tsdbInsertRowToTableImpl(pNode, pTable); @@ -425,25 +686,40 @@ static int32_t tdInsertRowToTable(STsdbRepo *pRepo, SDataRow row, STable *pTable return 0; } -static int32_t tsdbInsertDataToTable(tsdb_repo_t *repo, SSubmitBlock *pBlock) { +static int32_t tsdbInsertDataToTable(tsdb_repo_t *repo, SSubmitBlk *pBlock) { STsdbRepo *pRepo = (STsdbRepo *)repo; STable *pTable = tsdbIsValidTableToInsert(pRepo->tsdbMeta, pBlock->tableId); - if (pTable == NULL) { - return -1; - } + if (pTable == NULL) return -1; - SDataRows rows = pBlock->data; - SDataRowsIter rDataIter, *pIter; - pIter = &rDataIter; + SSubmitBlkIter blkIter; SDataRow row; - tdInitSDataRowsIter(rows, pIter); - while ((row = tdDataRowsNext(pIter)) != NULL) { + tsdbInitSubmitBlkIter(pBlock, &blkIter); + while ((row = tsdbGetSubmitBlkNext(&blkIter)) != NULL) { if (tdInsertRowToTable(pRepo, row, pTable) < 0) { - // TODO: deal with the error here + return -1; } } return 0; +} + +static void *tsdbCommitToFile(void *arg) { + // TODO + STsdbRepo *pRepo = (STsdbRepo *)arg; + STsdbMeta *pMeta = pRepo->tsdbMeta; + for (int i = 0; i < pRepo->config.maxTables; i++) { + STable *pTable = pMeta->tables[i]; + if (pTable == NULL) continue; + SSkipListIterator *pIter = tSkipListCreateIter(pTable->iData); + while (tSkipListIterNext(pIter)) { + SSkipListNode *node = tSkipListIterGet(pIter); + SDataRow row = SL_GET_NODE_DATA(node); + int k = 0; + + } + } + + return NULL; } \ No newline at end of file diff --git a/src/vnode/tsdb/src/tsdbMeta.c b/src/vnode/tsdb/src/tsdbMeta.c index 6c9cc2404aafb57d13d1e103c5940de600ce7efb..98dcd45bedece4351a67430a213c597663fb35cd 100644 --- a/src/vnode/tsdb/src/tsdbMeta.c +++ b/src/vnode/tsdb/src/tsdbMeta.c @@ -9,31 +9,149 @@ #include "tsdbCache.h" #define TSDB_SUPER_TABLE_SL_LEVEL 5 // TODO: may change here +#define TSDB_META_FILE_NAME "META" static int tsdbFreeTable(STable *pTable); static int32_t tsdbCheckTableCfg(STableCfg *pCfg); -static int tsdbAddTableToMeta(STsdbMeta *pMeta, STable *pTable); +static int tsdbAddTableToMeta(STsdbMeta *pMeta, STable *pTable, bool addIdx); static int tsdbAddTableIntoMap(STsdbMeta *pMeta, STable *pTable); static int tsdbAddTableIntoIndex(STsdbMeta *pMeta, STable *pTable); static int tsdbRemoveTableFromIndex(STsdbMeta *pMeta, STable *pTable); +static int tsdbEstimateTableEncodeSize(STable *pTable); +static char * getTupleKey(const void *data); -STsdbMeta *tsdbCreateMeta(int32_t maxTables) { - STsdbMeta *pMeta = (STsdbMeta *)malloc(sizeof(STsdbMeta)); - if (pMeta == NULL) { - return NULL; +/** + * Encode a TSDB table object as a binary content + * ASSUMPTIONS: VALID PARAMETERS + * + * @param pTable table object to encode + * @param contLen the encoded binary content length + * + * @return binary content for success + * NULL fro failure + */ +void *tsdbEncodeTable(STable *pTable, int *contLen) { + if (pTable == NULL) return NULL; + + *contLen = tsdbEstimateTableEncodeSize(pTable); + if (*contLen < 0) return NULL; + + void *ret = malloc(*contLen); + if (ret == NULL) return NULL; + + void *ptr = ret; + T_APPEND_MEMBER(ptr, pTable, STable, type); + T_APPEND_MEMBER(ptr, &(pTable->tableId), STableId, uid); + T_APPEND_MEMBER(ptr, &(pTable->tableId), STableId, tid); + T_APPEND_MEMBER(ptr, pTable, STable, superUid); + T_APPEND_MEMBER(ptr, pTable, STable, sversion); + + if (pTable->type == TSDB_SUPER_TABLE) { + ptr = tdEncodeSchema(ptr, pTable->schema); + ptr = tdEncodeSchema(ptr, pTable->tagSchema); + } else if (pTable->type == TSDB_CHILD_TABLE) { + dataRowCpy(ptr, pTable->tagVal); + } else { + ptr = tdEncodeSchema(ptr, pTable->schema); + } + + return ret; +} + +/** + * Decode from an encoded binary + * ASSUMPTIONS: valid parameters + * + * @param cont binary object + * @param contLen binary length + * + * @return TSDB table object for success + * NULL for failure + */ +STable *tsdbDecodeTable(void *cont, int contLen) { + STable *pTable = (STable *)calloc(1, sizeof(STable)); + if (pTable == NULL) return NULL; + + void *ptr = cont; + T_READ_MEMBER(ptr, int8_t, pTable->type); + T_READ_MEMBER(ptr, int64_t, pTable->tableId.uid); + T_READ_MEMBER(ptr, int32_t, pTable->tableId.tid); + T_READ_MEMBER(ptr, int32_t, pTable->superUid); + T_READ_MEMBER(ptr, int32_t, pTable->sversion); + + if (pTable->type == TSDB_SUPER_TABLE) { + pTable->schema = tdDecodeSchema(&ptr); + pTable->tagSchema = tdDecodeSchema(&ptr); + } else if (pTable->type == TSDB_CHILD_TABLE) { + pTable->tagVal = tdDataRowDup(ptr); + } else { + pTable->schema = tdDecodeSchema(&ptr); + } + + return pTable; +} + +void *tsdbFreeEncode(void *cont) { + if (cont != NULL) free(cont); +} + +int tsdbRestoreTable(void *pHandle, void *cont, int contLen) { + STsdbMeta *pMeta = (STsdbMeta *)pHandle; + + STable *pTable = tsdbDecodeTable(cont, contLen); + if (pTable == NULL) return -1; + + if (pTable->type == TSDB_SUPER_TABLE) { + pTable->content.pIndex = + tSkipListCreate(TSDB_SUPER_TABLE_SL_LEVEL, TSDB_DATA_TYPE_TIMESTAMP, sizeof(int64_t), 1, 0, getTupleKey); + } else { + pTable->content.pData = tSkipListCreate(TSDB_SUPER_TABLE_SL_LEVEL, TSDB_DATA_TYPE_TIMESTAMP, + TYPE_BYTES[TSDB_DATA_TYPE_TIMESTAMP], 0, 0, getTupleKey); + } + + tsdbAddTableToMeta(pMeta, pTable, false); + + return 0; +} + +void tsdbOrgMeta(void *pHandle) { + STsdbMeta *pMeta = (STsdbMeta *)pHandle; + + for (int i = 0; i < pMeta->maxTables; i++) { + STable *pTable = pMeta->tables[i]; + if (pTable != NULL && pTable->type == TSDB_CHILD_TABLE) { + tsdbAddTableIntoIndex(pMeta, pTable); + } } +} + +/** + * Initialize the meta handle + * ASSUMPTIONS: VALID PARAMETER + */ +STsdbMeta *tsdbInitMeta(const char *rootDir, int32_t maxTables) { + STsdbMeta *pMeta = (STsdbMeta *)malloc(sizeof(STsdbMeta)); + if (pMeta == NULL) return NULL; pMeta->maxTables = maxTables; pMeta->nTables = 0; - pMeta->stables = NULL; + pMeta->superList = NULL; pMeta->tables = (STable **)calloc(maxTables, sizeof(STable *)); if (pMeta->tables == NULL) { free(pMeta); return NULL; } - pMeta->tableMap = taosInitHashTable(maxTables + maxTables / 10, taosGetDefaultHashFunction, false); - if (pMeta->tableMap == NULL) { + pMeta->map = taosHashInit(maxTables * TSDB_META_HASH_FRACTION, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false); + if (pMeta->map == NULL) { + free(pMeta->tables); + free(pMeta); + return NULL; + } + + pMeta->mfh = tsdbInitMetaFile(rootDir, maxTables, tsdbRestoreTable, tsdbOrgMeta, pMeta); + if (pMeta->mfh == NULL) { + taosHashCleanup(pMeta->map); free(pMeta->tables); free(pMeta); return NULL; @@ -45,6 +163,8 @@ STsdbMeta *tsdbCreateMeta(int32_t maxTables) { int32_t tsdbFreeMeta(STsdbMeta *pMeta) { if (pMeta == NULL) return 0; + tsdbCloseMetaFile(pMeta->mfh); + for (int i = 0; i < pMeta->maxTables; i++) { if (pMeta->tables[i] != NULL) { tsdbFreeTable(pMeta->tables[i]); @@ -53,14 +173,14 @@ int32_t tsdbFreeMeta(STsdbMeta *pMeta) { free(pMeta->tables); - STable *pTable = pMeta->stables; + STable *pTable = pMeta->superList; while (pTable != NULL) { STable *pTemp = pTable; pTable = pTemp->next; tsdbFreeTable(pTemp); } - taosCleanUpHashTable(pMeta->tableMap); + taosHashCleanup(pMeta->map); free(pMeta); @@ -68,74 +188,78 @@ int32_t tsdbFreeMeta(STsdbMeta *pMeta) { } int32_t tsdbCreateTableImpl(STsdbMeta *pMeta, STableCfg *pCfg) { - if (tsdbCheckTableCfg(pCfg) < 0) { - return -1; - } + if (tsdbCheckTableCfg(pCfg) < 0) return -1; - STable *pSTable = NULL; + STable *super = NULL; int newSuper = 0; - if (IS_CREATE_STABLE(pCfg)) { // to create a TSDB_STABLE, check if super table exists - pSTable = tsdbGetTableByUid(pMeta, pCfg->stableUid); - if (pSTable == NULL) { // super table not exists, try to create it + if (pCfg->type == TSDB_CHILD_TABLE) { + super = tsdbGetTableByUid(pMeta, pCfg->superUid); + if (super == NULL) { // super table not exists, try to create it newSuper = 1; - pSTable = (STable *)calloc(1, sizeof(STable)); - if (pSTable == NULL) return -1; - - pSTable->tableId.uid = pCfg->stableUid; - pSTable->tableId.tid = -1; - pSTable->type = TSDB_SUPER_TABLE; - // pSTable->createdTime = pCfg->createdTime; // The created time is not required - pSTable->stableUid = -1; - pSTable->numOfCols = pCfg->numOfCols; - pSTable->pSchema = tdDupSchema(pCfg->schema); - pSTable->content.pIndex = tSkipListCreate(TSDB_SUPER_TABLE_SL_LEVEL, TSDB_DATA_TYPE_TIMESTAMP, sizeof(int64_t), 1, - 0, NULL); // Allow duplicate key, no lock - if (pSTable->content.pIndex == NULL) { - free(pSTable); + // TODO: use function to implement create table object + super = (STable *)calloc(1, sizeof(STable)); + if (super == NULL) return -1; + + super->type = TSDB_SUPER_TABLE; + super->tableId.uid = pCfg->superUid; + super->tableId.tid = -1; + super->superUid = TSDB_INVALID_SUPER_TABLE_ID; + super->schema = tdDupSchema(pCfg->schema); + super->tagSchema = tdDupSchema(pCfg->tagSchema); + super->tagVal = tdDataRowDup(pCfg->tagValues); + super->content.pIndex = tSkipListCreate(TSDB_SUPER_TABLE_SL_LEVEL, TSDB_DATA_TYPE_TIMESTAMP, sizeof(int64_t), 1, + 0, getTupleKey); // Allow duplicate key, no lock + + if (super->content.pIndex == NULL) { + tdFreeSchema(super->schema); + tdFreeSchema(super->tagSchema); + tdFreeDataRow(super->tagVal); + free(super); return -1; } } else { - if (pSTable->type != TSDB_SUPER_TABLE) return -1; + if (super->type != TSDB_SUPER_TABLE) return -1; } } - STable *pTable = (STable *)malloc(sizeof(STable)); - if (pTable == NULL) { - if (newSuper) tsdbFreeTable(pSTable); + STable *table = (STable *)malloc(sizeof(STable)); + if (table == NULL) { + if (newSuper) tsdbFreeTable(super); return -1; } - pTable->tableId = pCfg->tableId; - pTable->createdTime = pCfg->createdTime; - if (IS_CREATE_STABLE(pCfg)) { // TSDB_STABLE - pTable->type = TSDB_STABLE; - pTable->stableUid = pCfg->stableUid; - pTable->pTagVal = tdDataRowDup(pCfg->tagValues); - } else { // TSDB_NTABLE - pTable->type = TSDB_NTABLE; - pTable->stableUid = -1; - pTable->pSchema = tdDupSchema(pCfg->schema); + table->tableId = pCfg->tableId; + if (IS_CREATE_STABLE(pCfg)) { // TSDB_CHILD_TABLE + table->type = TSDB_CHILD_TABLE; + table->superUid = pCfg->superUid; + table->tagVal = tdDataRowDup(pCfg->tagValues); + } else { // TSDB_NORMAL_TABLE + table->type = TSDB_NORMAL_TABLE; + table->superUid = -1; + table->schema = tdDupSchema(pCfg->schema); + } + table->content.pData = tSkipListCreate(TSDB_SUPER_TABLE_SL_LEVEL, TSDB_DATA_TYPE_TIMESTAMP, TYPE_BYTES[TSDB_DATA_TYPE_TIMESTAMP], 0, 0, getTupleKey); + + // Register to meta + if (newSuper) tsdbAddTableToMeta(pMeta, super, true); + tsdbAddTableToMeta(pMeta, table, true); + + // Write to meta file + int bufLen = 0; + if (newSuper) { + void *buf = tsdbEncodeTable(super, &bufLen); + tsdbInsertMetaRecord(pMeta->mfh, super->tableId.uid, buf, bufLen); + tsdbFreeEncode(buf); } - pTable->content.pData = tSkipListCreate(TSDB_SUPER_TABLE_SL_LEVEL, 0, 8, 0, 0, NULL); - if (newSuper) tsdbAddTableToMeta(pMeta, pSTable); - tsdbAddTableToMeta(pMeta, pTable); + void *buf = tsdbEncodeTable(table, &bufLen); + tsdbInsertMetaRecord(pMeta->mfh, table->tableId.uid, buf, bufLen); + tsdbFreeEncode(buf); return 0; } -STsdbMeta *tsdbOpenMeta(char *tsdbDir) { - // TODO : Open meta file for reading - - STsdbMeta *pMeta = (STsdbMeta *)malloc(sizeof(STsdbMeta)); - if (pMeta == NULL) { - return NULL; - } - - return pMeta; -} - /** * Check if a table is valid to insert. * @return NULL for invalid and the pointer to the table if valid @@ -165,7 +289,7 @@ int32_t tsdbDropTableImpl(STsdbMeta *pMeta, STableId tableId) { pMeta->tables[pTable->tableId.tid] = NULL; pMeta->nTables--; assert(pMeta->nTables >= 0); - if (pTable->type == TSDB_STABLE) { + if (pTable->type == TSDB_CHILD_TABLE) { tsdbRemoveTableFromIndex(pMeta, pTable); } @@ -182,10 +306,10 @@ int32_t tsdbInsertRowToTableImpl(SSkipListNode *pNode, STable *pTable) { static int tsdbFreeTable(STable *pTable) { // TODO: finish this function - if (pTable->type == TSDB_STABLE) { - tdFreeDataRow(pTable->pTagVal); + if (pTable->type == TSDB_CHILD_TABLE) { + tdFreeDataRow(pTable->tagVal); } else { - tdFreeSchema(pTable->pSchema); + tdFreeSchema(pTable->schema); } // Free content @@ -205,28 +329,28 @@ static int32_t tsdbCheckTableCfg(STableCfg *pCfg) { } STable *tsdbGetTableByUid(STsdbMeta *pMeta, int64_t uid) { - void *ptr = taosGetDataFromHashTable(pMeta->tableMap, (char *)(&uid), sizeof(uid)); + void *ptr = taosHashGet(pMeta->map, (char *)(&uid), sizeof(uid)); if (ptr == NULL) return NULL; return *(STable **)ptr; } -static int tsdbAddTableToMeta(STsdbMeta *pMeta, STable *pTable) { +static int tsdbAddTableToMeta(STsdbMeta *pMeta, STable *pTable, bool addIdx) { if (pTable->type == TSDB_SUPER_TABLE) { // add super table to the linked list - if (pMeta->stables == NULL) { - pMeta->stables = pTable; + if (pMeta->superList == NULL) { + pMeta->superList = pTable; pTable->next = NULL; } else { - STable *pTemp = pMeta->stables; - pMeta->stables = pTable; + STable *pTemp = pMeta->superList; + pMeta->superList = pTable; pTable->next = pTemp; } } else { // add non-super table to the array pMeta->tables[pTable->tableId.tid] = pTable; - if (pTable->type == TSDB_STABLE) { + if (pTable->type == TSDB_CHILD_TABLE) { // add STABLE to the index tsdbAddTableIntoIndex(pMeta, pTable); } @@ -244,19 +368,44 @@ static int tsdbRemoveTableFromMeta(STsdbMeta *pMeta, STable *pTable) { static int tsdbAddTableIntoMap(STsdbMeta *pMeta, STable *pTable) { // TODO: add the table to the map int64_t uid = pTable->tableId.uid; - if (taosAddToHashTable(pMeta->tableMap, (char *)(&uid), sizeof(uid), (void *)(&pTable), sizeof(pTable)) < 0) { + if (taosHashPut(pMeta->map, (char *)(&uid), sizeof(uid), (void *)(&pTable), sizeof(pTable)) < 0) { return -1; } return 0; } static int tsdbAddTableIntoIndex(STsdbMeta *pMeta, STable *pTable) { - assert(pTable->type == TSDB_STABLE); + assert(pTable->type == TSDB_CHILD_TABLE); // TODO return 0; } static int tsdbRemoveTableFromIndex(STsdbMeta *pMeta, STable *pTable) { - assert(pTable->type == TSDB_STABLE); + assert(pTable->type == TSDB_CHILD_TABLE); // TODO return 0; +} + +static int tsdbEstimateTableEncodeSize(STable *pTable) { + int size = 0; + size += T_MEMBER_SIZE(STable, type); + size += T_MEMBER_SIZE(STable, tableId); + size += T_MEMBER_SIZE(STable, superUid); + size += T_MEMBER_SIZE(STable, sversion); + + if (pTable->type == TSDB_SUPER_TABLE) { + size += tdGetSchemaEncodeSize(pTable->schema); + size += tdGetSchemaEncodeSize(pTable->tagSchema); + } else if (pTable->type == TSDB_CHILD_TABLE) { + size += dataRowLen(pTable->tagVal); + } else { + size += tdGetSchemaEncodeSize(pTable->schema); + } + + return size; +} + +static char *getTupleKey(const void * data) { + SDataRow row = (SDataRow)data; + + return dataRowAt(row, TD_DATA_ROW_HEAD_SIZE); } \ No newline at end of file diff --git a/src/vnode/tsdb/src/tsdbMetaFile.c b/src/vnode/tsdb/src/tsdbMetaFile.c new file mode 100644 index 0000000000000000000000000000000000000000..2a32283c06dd50cf3637745816379e07f51f3b84 --- /dev/null +++ b/src/vnode/tsdb/src/tsdbMetaFile.c @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#include "os.h" + +#include "taosdef.h" +#include "hash.h" +#include "tsdbMetaFile.h" + +#define TSDB_META_FILE_VERSION_MAJOR 1 +#define TSDB_META_FILE_VERSION_MINOR 0 +#define TSDB_META_FILE_HEADER_SIZE 512 + +typedef struct { + int32_t offset; + int32_t size; + int64_t uid; +} SRecordInfo; + +static int32_t tsdbGetMetaFileName(char *rootDir, char *fname); +static int32_t tsdbCheckMetaHeader(int fd); +static int32_t tsdbWriteMetaHeader(int fd); +static int tsdbCreateMetaFile(char *fname); +static int tsdbRestoreFromMetaFile(char *fname, SMetaFile *mfh); + +SMetaFile *tsdbInitMetaFile(char *rootDir, int32_t maxTables, iterFunc iFunc, afterFunc aFunc, void *appH) { + char fname[128] = "\0"; + if (tsdbGetMetaFileName(rootDir, fname) < 0) return NULL; + + SMetaFile *mfh = (SMetaFile *)calloc(1, sizeof(SMetaFile)); + if (mfh == NULL) return NULL; + + mfh->iFunc = iFunc; + mfh->aFunc = aFunc; + mfh->appH = appH; + mfh->nDel = 0; + mfh->tombSize = 0; + mfh->size = 0; + + // OPEN MAP + mfh->map = + taosHashInit(maxTables * TSDB_META_HASH_FRACTION, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false); + if (mfh->map == NULL) { + free(mfh); + return NULL; + } + + // OPEN FILE + if (access(fname, F_OK) < 0) { // file not exists + mfh->fd = tsdbCreateMetaFile(fname); + if (mfh->fd < 0) { + taosHashCleanup(mfh->map); + free(mfh); + return NULL; + } + mfh->size += TSDB_META_FILE_HEADER_SIZE; + } else { // file exists, recover from file + if (tsdbRestoreFromMetaFile(fname, mfh) < 0) { + taosHashCleanup(mfh->map); + free(mfh); + return NULL; + } + } + + return mfh; +} + +int32_t tsdbInsertMetaRecord(SMetaFile *mfh, int64_t uid, void *cont, int32_t contLen) { + if (taosHashGet(mfh->map, (char *)(&uid), sizeof(uid)) != NULL) { + return -1; + } + + SRecordInfo info; + info.offset = mfh->size; + info.size = contLen; + info.uid = uid; + + mfh->size += (contLen + sizeof(SRecordInfo)); + + if (taosHashPut(mfh->map, (char *)(&uid), sizeof(uid), (void *)(&info), sizeof(SRecordInfo)) < 0) { + return -1; + } + + // TODO: make below a function to implement + if (lseek(mfh->fd, info.offset, SEEK_SET) < 0) { + return -1; + } + + if (write(mfh->fd, (void *)(&info), sizeof(SRecordInfo)) < 0) { + return -1; + } + + if (write(mfh->fd, cont, contLen) < 0) { + return -1; + } + + fsync(mfh->fd); + + mfh->tombSize++; + + return 0; +} + +int32_t tsdbDeleteMetaRecord(SMetaFile *mfh, int64_t uid) { + char *ptr = taosHashGet(mfh->map, (char *)(&uid), sizeof(uid)); + if (ptr == NULL) return -1; + + SRecordInfo info = *(SRecordInfo *)ptr; + + // Remove record from hash table + taosHashRemove(mfh->map, (char *)(&uid), sizeof(uid)); + + // Remove record from file + + info.offset = -info.offset; + if (lseek(mfh->fd, -info.offset, SEEK_CUR) < 0) { + return -1; + } + + if (write(mfh->fd, (void *)(&info), sizeof(SRecordInfo)) < 0) { + return -1; + } + + fsync(mfh->fd); + + mfh->nDel++; + + return 0; +} + +int32_t tsdbUpdateMetaRecord(SMetaFile *mfh, int64_t uid, void *cont, int32_t contLen) { + char *ptr = taosHashGet(mfh->map, (char *)(&uid), sizeof(uid)); + if (ptr == NULL) return -1; + + SRecordInfo info = *(SRecordInfo *)ptr; + // Update the hash table + if (taosHashPut(mfh->map, (char *)(&uid), sizeof(uid), (void *)(&info), sizeof(SRecordInfo)) < 0) { + return -1; + } + + // Update record in file + if (info.size >= contLen) { // Just update it in place + info.size = contLen; + + } else { // Just append to the end of file + info.offset = mfh->size; + info.size = contLen; + + mfh->size += contLen; + } + if (lseek(mfh->fd, -info.offset, SEEK_CUR) < 0) { + return -1; + } + + if (write(mfh->fd, (void *)(&info), sizeof(SRecordInfo)) < 0) { + return -1; + } + + fsync(mfh->fd); + + return 0; +} + +void tsdbCloseMetaFile(SMetaFile *mfh) { + if (mfh == NULL) return; + close(mfh); + + taosHashCleanup(mfh->map); +} + +static int32_t tsdbGetMetaFileName(char *rootDir, char *fname) { + if (rootDir == NULL) return -1; + sprintf(fname, "%s/%s", rootDir, TSDB_META_FILE_NAME); + return 0; +} + +static int32_t tsdbCheckMetaHeader(int fd) { + // TODO: write the meta file header check function + return 0; +} + +static int32_t tsdbWriteMetaHeader(int fd) { + // TODO: write the meta file header to file + char head[TSDB_META_FILE_HEADER_SIZE] = "\0"; + sprintf(head, "version: %d.%d", TSDB_META_FILE_VERSION_MAJOR, TSDB_META_FILE_VERSION_MINOR); + + write(fd, (void *)head, TSDB_META_FILE_HEADER_SIZE); + return 0; +} + +static int32_t tsdbReadMetaHeader(int fd) { + lseek(fd, TSDB_META_FILE_HEADER_SIZE, SEEK_SET); + return 0; +} + +static int tsdbCreateMetaFile(char *fname) { + int fd = open(fname, O_RDWR | O_CREAT, 0755); + if (fd < 0) return -1; + + if (tsdbWriteMetaHeader(fd) < 0) { + close(fd); + return NULL; + } + + return fd; +} + +static int tsdbCheckMetaFileIntegrety(int fd) { + // TODO + return 0; +} + +static int tsdbRestoreFromMetaFile(char *fname, SMetaFile *mfh) { + int fd = open(fname, O_RDWR); + if (fd < 0) return -1; + + if (tsdbCheckMetaFileIntegrety(fd) < 0) { + // TODO: decide if to auto-recover the file + close(fd); + return -1; + } + + if (lseek(fd, TSDB_META_FILE_HEADER_SIZE, SEEK_SET) < 0) { + // TODO: deal with the error + close(fd); + return -1; + } + + mfh->size += TSDB_META_FILE_HEADER_SIZE; + + mfh->fd = fd; + + void *buf = NULL; + int buf_size = 0; + + SRecordInfo info; + while (1) { + if (read(mfh->fd, (void *)(&info), sizeof(SRecordInfo)) == 0) break; + if (info.offset < 0) { + mfh->size += (info.size + sizeof(SRecordInfo)); + mfh->tombSize += (info.size + sizeof(SRecordInfo)); + lseek(mfh->fd, info.size, SEEK_CUR); + mfh->size = mfh->size + sizeof(SRecordInfo) + info.size; + mfh->tombSize = mfh->tombSize + sizeof(SRecordInfo) + info.size; + } else { + if (taosHashPut(mfh->map, (char *)(&info.uid), sizeof(info.uid), (void *)(&info), sizeof(SRecordInfo)) < 0) { + if (buf) free(buf); + return -1; + } + + buf = realloc(buf, info.size); + if (buf == NULL) return -1; + + if (read(mfh->fd, buf, info.size) < 0) { + if (buf) free(buf); + return -1; + } + (*mfh->iFunc)(mfh->appH, buf, info.size); + + mfh->size = mfh->size + sizeof(SRecordInfo) + info.size; + } + + } + (*mfh->aFunc)(mfh->appH); + + if (buf) free(buf); + + return 0; +} \ No newline at end of file diff --git a/src/vnode/tsdb/src/tsdbRead.c b/src/vnode/tsdb/src/tsdbRead.c index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..a62299c45fa83b7e021e2e91863e10eea9126046 100644 --- a/src/vnode/tsdb/src/tsdbRead.c +++ b/src/vnode/tsdb/src/tsdbRead.c @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "os.h" +#include "tsdb.h" + +tsdb_query_handle_t *tsdbQueryByTableId(STsdbQueryCond *pCond, SArray *idList, SArray *pColumnInfo) { + +} + +bool tsdbNextDataBlock(tsdb_query_handle_t *pQueryHandle) { + return false; +} + +SDataBlockInfo tsdbRetrieveDataBlockInfo(tsdb_query_handle_t *pQueryHandle) { + +} + +int32_t tsdbRetrieveDataBlockStatisInfo(tsdb_query_handle_t *pQueryHandle, SDataStatis **pBlockStatis) { + +} + +SArray *tsdbRetrieveDataBlock(tsdb_query_handle_t *pQueryHandle, SArray *pIdList) { + +} + +int32_t tsdbResetQuery(tsdb_query_handle_t *pQueryHandle, STimeWindow* window, tsdbpos_t position, int16_t order) { + +} + +int32_t tsdbDataBlockSeek(tsdb_query_handle_t *pQueryHandle, tsdbpos_t pos) { + +} + +tsdbpos_t tsdbDataBlockTell(tsdb_query_handle_t *pQueryHandle) { + return NULL; +} + +SArray *tsdbRetrieveDataRow(tsdb_query_handle_t *pQueryHandle, SArray *pIdList, SQueryRowCond *pCond) { + +} + +tsdb_query_handle_t *tsdbQueryFromTagConds(STsdbQueryCond *pCond, int16_t stableId, const char *pTagFilterStr) { + +} + +STableIDList *tsdbGetTableList(tsdb_query_handle_t *pQueryHandle) { + +} + +STableIDList *tsdbQueryTableList(int16_t stableId, const char *pTagCond) { + +} + diff --git a/src/vnode/tsdb/tests/tsdbTests.cpp b/src/vnode/tsdb/tests/tsdbTests.cpp index 737deee3c5a7e1fd732a5b77b80c00db4f966a29..42a22553c7511160c189ab1b5415e29f45dd383e 100644 --- a/src/vnode/tsdb/tests/tsdbTests.cpp +++ b/src/vnode/tsdb/tests/tsdbTests.cpp @@ -3,92 +3,119 @@ #include "tsdb.h" #include "dataformat.h" +#include "tsdbFile.h" #include "tsdbMeta.h" -TEST(TsdbTest, createTable) { - STsdbMeta *pMeta = tsdbCreateMeta(100); - ASSERT_NE(pMeta, nullptr); - - STableCfg config; - config.tableId.tid = 0; - config.tableId.uid = 98868728187539L; - config.numOfCols = 5; - config.schema = tdNewSchema(config.numOfCols); - for (int i = 0; i < schemaNCols(config.schema); i++) { - STColumn *pCol = tdNewCol(TSDB_DATA_TYPE_BIGINT, i, 0); - tdColCpy(schemaColAt(config.schema, i), pCol); - tdFreeCol(pCol); +TEST(TsdbTest, tableEncodeDecode) { + STable *pTable = (STable *)malloc(sizeof(STable)); + + pTable->type = TSDB_NORMAL_TABLE; + pTable->tableId.uid = 987607499877672L; + pTable->tableId.tid = 0; + pTable->superUid = -1; + pTable->sversion = 0; + pTable->tagSchema = NULL; + pTable->tagVal = NULL; + int nCols = 5; + STSchema *schema = tdNewSchema(nCols); + + for (int i = 0; i < nCols; i++) { + if (i == 0) { + tdSchemaAppendCol(schema, TSDB_DATA_TYPE_TIMESTAMP, i, -1); + } else { + tdSchemaAppendCol(schema, TSDB_DATA_TYPE_INT, i, -1); + } } - config.tagValues = nullptr; - tsdbCreateTableImpl(pMeta, &config); + pTable->schema = schema; - STable *pTable = tsdbGetTableByUid(pMeta, config.tableId.uid); - ASSERT_NE(pTable, nullptr); + int bufLen = 0; + void *buf = tsdbEncodeTable(pTable, &bufLen); + + STable *tTable = tsdbDecodeTable(buf, bufLen); + + ASSERT_EQ(pTable->type, tTable->type); + ASSERT_EQ(pTable->tableId.uid, tTable->tableId.uid); + ASSERT_EQ(pTable->tableId.tid, tTable->tableId.tid); + ASSERT_EQ(pTable->superUid, tTable->superUid); + ASSERT_EQ(pTable->sversion, tTable->sversion); + ASSERT_EQ(memcmp(pTable->schema, tTable->schema, sizeof(STSchema) + sizeof(STColumn) * nCols), 0); + ASSERT_EQ(tTable->content.pData, nullptr); } TEST(TsdbTest, createRepo) { - STsdbCfg *pCfg = tsdbCreateDefaultCfg(); + STsdbCfg config; - // Create a tsdb repository - tsdb_repo_t *pRepo = tsdbCreateRepo("/root/mnt/test/vnode0", pCfg, NULL); + // 1. Create a tsdb repository + tsdbSetDefaultCfg(&config); + tsdb_repo_t *pRepo = tsdbCreateRepo("/home/ubuntu/work/ttest/vnode0", &config, NULL); ASSERT_NE(pRepo, nullptr); - tsdbFreeCfg(pCfg); - - // create a normal table in this repository - STableCfg config; - config.tableId.tid = 0; - config.tableId.uid = 98868728187539L; - config.numOfCols = 5; - config.schema = tdNewSchema(config.numOfCols); - STColumn *pCol = tdNewCol(TSDB_DATA_TYPE_TIMESTAMP, 0, 0); - tdColCpy(schemaColAt(config.schema, 0), pCol); - tdFreeCol(pCol); - for (int i = 1; i < schemaNCols(config.schema); i++) { - pCol = tdNewCol(TSDB_DATA_TYPE_BIGINT, i, 0); - tdColCpy(schemaColAt(config.schema, i), pCol); - tdFreeCol(pCol); - } - tsdbCreateTable(pRepo, &config); - // Write some data + // 2. Create a normal table + STableCfg tCfg; + ASSERT_EQ(tsdbInitTableCfg(&tCfg, TSDB_SUPER_TABLE, 987607499877672L, 0), -1); + ASSERT_EQ(tsdbInitTableCfg(&tCfg, TSDB_NORMAL_TABLE, 987607499877672L, 0), 0); - // int32_t size = sizeof(SSubmitMsg) + sizeof(SSubmitBlock) + tdMaxRowDataBytes(config.schema) * 10 + sizeof(int32_t); + int nCols = 5; + STSchema *schema = tdNewSchema(nCols); - // tdUpdateSchema(config.schema); - - // SSubmitMsg *pMsg = (SSubmitMsg *)malloc(size); - // pMsg->numOfTables = 1; // TODO: use api + for (int i = 0; i < nCols; i++) { + if (i == 0) { + tdSchemaAppendCol(schema, TSDB_DATA_TYPE_TIMESTAMP, i, -1); + } else { + tdSchemaAppendCol(schema, TSDB_DATA_TYPE_INT, i, -1); + } + } - // SSubmitBlock *pBlock = (SSubmitBlock *)pMsg->data; - // pBlock->tableId = {.uid = 98868728187539L, .tid = 0}; - // pBlock->sversion = 0; - // pBlock->len = sizeof(SSubmitBlock); + tsdbTableSetSchema(&tCfg, schema, true); + + tsdbCreateTable(pRepo, &tCfg); + + // // 3. Loop to write some simple data + int nRows = 100; + int rowsPerSubmit = 10; + int64_t start_time = 1584081000000; + + SSubmitMsg *pMsg = (SSubmitMsg *)malloc(sizeof(SSubmitMsg) + sizeof(SSubmitBlk) + tdMaxRowBytesFromSchema(schema) * rowsPerSubmit); + + for (int k = 0; k < nRows/rowsPerSubmit; k++) { + SSubmitBlk *pBlock = pMsg->blocks; + pBlock->tableId = {.uid = 987607499877672L, .tid = 0}; + pBlock->sversion = 0; + pBlock->len = 0; + for (int i = 0; i < rowsPerSubmit; i++) { + start_time += 1000; + SDataRow row = (SDataRow)(pBlock->data + pBlock->len); + tdInitDataRow(row, schema); + + for (int j = 0; j < schemaNCols(schema); j++) { + if (j == 0) { // Just for timestamp + tdAppendColVal(row, (void *)(&start_time), schemaColAt(schema, j)); + } else { // For int + int val = 10; + tdAppendColVal(row, (void *)(&val), schemaColAt(schema, j)); + } + } + pBlock->len += dataRowLen(row); + } + pMsg->length = pMsg->length + sizeof(SSubmitBlk) + pBlock->len; + + tsdbInsertData(pRepo, pMsg); + } - // SDataRows rows = pBlock->data; - // dataRowsInit(rows); + tsdbTriggerCommit(pRepo); - // SDataRow row = tdNewDataRow(tdMaxRowDataBytes(config.schema)); - // int64_t ttime = 1583508800000; - // for (int i = 0; i < 10; i++) { // loop over rows - // ttime += (10000 * i); - // tdDataRowReset(row); - // for (int j = 0; j < schemaNCols(config.schema); j++) { - // if (j == 0) { // set time stamp - // tdAppendColVal(row, (void *)(&ttime), schemaColAt(config.schema, j), 40); - // } else { // set other fields - // int32_t val = 10; - // tdAppendColVal(row, (void *)(&val), schemaColAt(config.schema, j), 40); - // } - // } +} - // tdDataRowsAppendRow(rows, row); - // } +TEST(TsdbTest, openRepo) { + tsdb_repo_t *pRepo = tsdbOpenRepo("/home/ubuntu/work/ttest/vnode0"); + ASSERT_NE(pRepo, nullptr); +} - // tsdbInsertData(pRepo, pMsg); +TEST(TsdbTest, createFileGroup) { + SFileGroup fGroup; - // tdFreeDataRow(row); + ASSERT_EQ(tsdbCreateFileGroup("/home/ubuntu/work/ttest/vnode0/data", 1820, &fGroup, 1000), 0); - tdFreeSchema(config.schema); - tsdbDropRepo(pRepo); + int k = 0; } \ No newline at end of file diff --git a/tests/examples/c/CMakeLists.txt b/tests/examples/c/CMakeLists.txt index 287fca7d410b88d240642a57ec194b3d0c686975..81d912fc00d9d543cbee9835ef6c791b1b67542f 100644 --- a/tests/examples/c/CMakeLists.txt +++ b/tests/examples/c/CMakeLists.txt @@ -1,13 +1,13 @@ PROJECT(TDengine) IF (TD_WINDOWS_64) - INCLUDE_DIRECTORIES(${TD_ROOT_DIR}/deps/pthread) + INCLUDE_DIRECTORIES(${TD_COMMUNITY_DIR}/deps/pthread) ENDIF () -INCLUDE_DIRECTORIES(. ${TD_ROOT_DIR}/src/inc ${TD_ROOT_DIR}/src/client/inc ${TD_OS_DIR}/inc) +INCLUDE_DIRECTORIES(. ${TD_COMMUNITY_DIR}/src/inc ${TD_COMMUNITY_DIR}/src/client/inc ${TD_COMMUNITY_DIR}/inc) AUX_SOURCE_DIRECTORY(. SRC) -#ADD_EXECUTABLE(demo ${SRC}) -#TARGET_LINK_LIBRARIES(demo taos_static trpc tutil pthread ) +ADD_EXECUTABLE(demo demo.c) +TARGET_LINK_LIBRARIES(demo taos_static trpc tutil pthread ) diff --git a/tests/examples/c/demo.c b/tests/examples/c/demo.c index a0c4588e6f38a6c6e8f87ec2a984720833f3df3b..dc16185d9a1b74cad4492c09f48dc3437772e16e 100644 --- a/tests/examples/c/demo.c +++ b/tests/examples/c/demo.c @@ -35,6 +35,7 @@ int main(int argc, char *argv[]) { return 0; } + taos_options(TSDB_OPTION_CONFIGDIR, "~/first/cfg"); // init TAOS taos_init(); @@ -44,8 +45,27 @@ int main(int argc, char *argv[]) { exit(1); } printf("success to connect to server\n"); - + int32_t code = taos_query(taos, "select * from test.t1"); + if (code != 0) { + printf("failed to execute query, reason:%s\n", taos_errstr(taos)); + } + + TAOS_RES* res = taos_use_result(taos); + TAOS_ROW row = NULL; + char buf[512] = {0}; + + int32_t numOfFields = taos_num_fields(res); + TAOS_FIELD* pFields = taos_fetch_fields(res); + + while((row = taos_fetch_row(res)) != NULL) { + taos_print_row(buf, row, pFields, numOfFields); + printf("%s\n", buf); + memset(buf, 0, 512); + } + + return 0; + taos_query(taos, "drop database demo"); if (taos_query(taos, "create database demo") != 0) { printf("failed to create database, reason:%s\n", taos_errstr(taos)); @@ -90,7 +110,7 @@ int main(int argc, char *argv[]) { exit(1); } - TAOS_ROW row; +// TAOS_ROW row; int rows = 0; int num_fields = taos_field_count(taos); TAOS_FIELD *fields = taos_fetch_fields(result);