diff --git a/docs/en/12-taos-sql/14-stream.md b/docs/en/12-taos-sql/14-stream.md index 17e4e4d1b0da6d0461c9ab478a9430855379fb12..c47d2da0ebf8b8d3ecc2e51fad659ceb423ad46f 100644 --- a/docs/en/12-taos-sql/14-stream.md +++ b/docs/en/12-taos-sql/14-stream.md @@ -10,7 +10,7 @@ Because stream processing is built in to TDengine, you are no longer reliant on ## Create a Stream ```sql -CREATE STREAM [IF NOT EXISTS] stream_name [stream_options] INTO stb_name AS subquery +CREATE STREAM [IF NOT EXISTS] stream_name [stream_options] INTO stb_name SUBTABLE(expression) AS subquery stream_options: { TRIGGER [AT_ONCE | WINDOW_CLOSE | MAX_DELAY time] WATERMARK time @@ -30,6 +30,8 @@ subquery: SELECT [DISTINCT] select_list Session windows, state windows, and sliding windows are supported. When you configure a session or state window for a supertable, you must use PARTITION BY TBNAME. +Subtable Clause defines the naming rules of auto-created subtable, you can see more details in below part: Partitions of Stream. + ```sql window_clause: { SESSION(ts_col, tol_val) @@ -47,6 +49,47 @@ CREATE STREAM avg_vol_s INTO avg_vol AS SELECT _wstart, count(*), avg(voltage) FROM meters PARTITION BY tbname INTERVAL(1m) SLIDING(30s); ``` +## Partitions of Stream + +A Stream can process data in multiple partitions. Partition rules can be defined by PARTITION BY clause in stream processing. Each partition will have different timelines and windows, and will be processed separately and be written into different subtables of target supertable. + +If a stream is created without PARTITION BY clause, all data will be written into one subtable. + +If a stream is created with PARTITION BY clause without SUBTABLE clause, each partition will be given a random name. + +If a stream is created with PARTITION BY clause and SUBTABLE clause, the name of each partition will be calculated according to SUBTABLE clause. For example: + +```sql +CREATE STREAM avg_vol_s INTO avg_vol SUBTABLE(CONCAT('new-', tname)) AS SELECT _wstart, count(*), avg(voltage) FROM meters PARTITION BY tbname tname INTERVAL(1m); +``` + +IN PARTITION clause, 'tbname', representing each subtable name of source supertable, is given alias 'tname'. And 'tname' is used in SUBTABLE clause. In SUBTABLE clause, each auto created subtable will concat 'new-' and source subtable name as their name. Other expressions are also allowed in SUBTABLE clause, but the output type must be varchar. + +If the output length exceeds the limitation of TDengine(192), the name will be truncated. If the generated name is occupied by some other table, the creation and writing of the new subtable will be failed. + +## Filling history data + +Normally a stream does not process data already or being written into source table when it's being creating. But adding FILL_HISTORY 1 as a stream option when creating the stream will allow it to process data written before and while creating the stream. For example: + +```sql +create stream if not exists s1 fill_history 1 into st1 as select count(*) from t1 interval(10s) +``` + +Combining fill_history option and where clause, stream can processing data of specific time range. For example, only process data after a past time. (In this case, 2020-01-30) + +```sql +create stream if not exists s1 fill_history 1 into st1 as select count(*) from t1 where ts > '2020-01-30' interval(10s) +``` + +As another example, only processing data starting from some past time, and ending at some future time. + +```sql +create stream if not exists s1 fill_history 1 into st1 as select count(*) from t1 where ts > '2020-01-30' and ts < '2023-01-01' interval(10s) +``` + +If some streams are totally outdated, and you do not want it to monitor or process anymore, those streams can be manually dropped and output data will be still kept. + + ## Delete a Stream ```sql diff --git a/docs/zh/12-taos-sql/14-stream.md b/docs/zh/12-taos-sql/14-stream.md index 932ad30b1a949d172d81819f2432daa42ce331c8..a70d559a860c4e6ec44cbd13c34f7f306ab452cb 100644 --- a/docs/zh/12-taos-sql/14-stream.md +++ b/docs/zh/12-taos-sql/14-stream.md @@ -8,7 +8,7 @@ description: 流式计算的相关 SQL 的详细语法 ## 创建流式计算 ```sql -CREATE STREAM [IF NOT EXISTS] stream_name [stream_options] INTO stb_name AS subquery +CREATE STREAM [IF NOT EXISTS] stream_name [stream_options] INTO stb_name SUBTABLE(expression) AS subquery stream_options: { TRIGGER [AT_ONCE | WINDOW_CLOSE | MAX_DELAY time] WATERMARK time @@ -28,6 +28,9 @@ subquery: SELECT select_list 支持会话窗口、状态窗口与滑动窗口,其中,会话窗口与状态窗口搭配超级表时必须与partition by tbname一起使用 + +subtable 子句定义了流式计算中创建的子表的命名规则,详见 流式计算的 partition 部分。 + ```sql window_clause: { SESSION(ts_col, tol_val) @@ -49,11 +52,43 @@ SELECT _wstart, count(*), avg(voltage) FROM meters PARTITION BY tbname INTERVAL( ## 流式计算的 partition -可以使用 PARTITION BY TBNAME 或 PARTITION BY tag,对一个流进行多分区的计算,每个分区的时间线与时间窗口是独立的,会各自聚合,并写入到目的表中的不同子表。 +可以使用 PARTITION BY TBNAME,tag,普通列或者表达式,对一个流进行多分区的计算,每个分区的时间线与时间窗口是独立的,会各自聚合,并写入到目的表中的不同子表。 + +不带 PARTITION BY 子句时,所有的数据将写入到一张子表。 + +在创建流时不使用 SUBTABLE 子句时,流式计算创建的超级表有唯一的 tag 列 groupId,每个 partition 会被分配唯一 groupId。与 schemaless 写入一致,我们通过 MD5 计算子表名,并自动创建它。 + +若创建流的语句中包含 SUBTABLE 子句,用户可以为每个 partition 对应的子表生成自定义的表名,例如: + +```sql +CREATE STREAM avg_vol_s INTO avg_vol SUBTABLE(CONCAT('new-', tname)) AS SELECT _wstart, count(*), avg(voltage) FROM meters PARTITION BY tbname tname INTERVAL(1m); +``` + +PARTITION 子句中,为 tbname 定义了一个别名 tname, 在PARTITION 子句中的别名可以用于 SUBTABLE 子句中的表达式计算,在上述示例中,流新创建的子表将以前缀 'new-' 连接原表名作为表名。 + +注意,子表名的长度若超过 TDengine 的限制,将被截断。若要生成的子表名已经存在于另一超级表,由于 TDengine 的子表名是唯一的,因此对应新子表的创建以及数据的写入将会失败。 + +## 流式计算读取历史数据 -不带 PARTITION BY 选项时,所有的数据将写入到一张子表。 +正常情况下,流式计算不会处理创建前已经写入源表中的数据,若要处理已经写入的数据,可以在创建流时设置 fill_history 1 选项,这样创建的流式计算会自动处理创建前、创建中、创建后写入的数据。例如: + +```sql +create stream if not exists s1 fill_history 1 into st1 as select count(*) from t1 interval(10s) +``` + +结合 fill_history 1 选项,可以实现只处理特定历史时间范围的数据,例如:只处理某历史时刻(2020年1月30日)之后的数据 + +```sql +create stream if not exists s1 fill_history 1 into st1 as select count(*) from t1 where ts > '2020-01-30' interval(10s) +``` + +再如,仅处理某时间段内的数据,结束时间可以是未来时间 + +```sql +create stream if not exists s1 fill_history 1 into st1 as select count(*) from t1 where ts > '2020-01-30' and ts < '2023-01-01' interval(10s) +``` -流式计算创建的超级表有唯一的 tag 列 groupId,每个 partition 会被分配唯一 groupId。与 schemaless 写入一致,我们通过 MD5 计算子表名,并自动创建它。 +如果该流任务已经彻底过期,并且您不再想让它检测或处理数据,您可以手动删除它,被计算出的数据仍会被保留。 ## 删除流式计算 diff --git a/include/common/tmsg.h b/include/common/tmsg.h index bb1addf1b698479ca418882a1a16c3c59e5347cc..ad6077db098b18d2b10d95058831d4f8c25d046a 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -68,7 +68,7 @@ typedef uint16_t tmsg_t; static inline bool vnodeIsMsgBlock(tmsg_t type) { return (type == TDMT_VND_CREATE_TABLE) || (type == TDMT_VND_ALTER_TABLE) || (type == TDMT_VND_DROP_TABLE) || - (type == TDMT_VND_UPDATE_TAG_VAL); + (type == TDMT_VND_UPDATE_TAG_VAL) || (type == TDMT_VND_ALTER_CONFIRM); } static inline bool syncUtilUserCommit(tmsg_t msgType) { diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index 4a6f0d14daa177205ea7f6c528fc7a4bfbaed324..31ab1f3259cd45b0223c5962fbca5f27386da57a 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -1126,8 +1126,12 @@ int32_t mndSetMoveVgroupInfoToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, } if (!force) { +#if 1 + { +#else if (newVg.replica == 1) { - mInfo("vgId:%d, will add 1 vnode, replca:1", pVgroup->vgId); +#endif + mInfo("vgId:%d, will add 1 vnode, replca:%d", pVgroup->vgId, newVg.replica); if (mndAddVnodeToVgroup(pMnode, pTrans, &newVg, pArray) != 0) return -1; for (int32_t i = 0; i < newVg.replica - 1; ++i) { if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg, newVg.vnodeGid[i].dnodeId) != 0) return -1; @@ -1155,6 +1159,9 @@ int32_t mndSetMoveVgroupInfoToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pDb, &newVg, newVg.vnodeGid[i].dnodeId) != 0) return -1; } if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg) != 0) return -1; +#if 1 + } +#else } else { // new replica == 3 mInfo("vgId:%d, will add 1 vnode, replca:3", pVgroup->vgId); if (mndAddVnodeToVgroup(pMnode, pTrans, &newVg, pArray) != 0) return -1; @@ -1181,6 +1188,7 @@ int32_t mndSetMoveVgroupInfoToTrans(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, if (mndAddCreateVnodeAction(pMnode, pTrans, pDb, &newVg, &newVg.vnodeGid[vnIndex]) != 0) return -1; if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pDb, &newVg) != 0) return -1; } +#endif } else { mInfo("vgId:%d, will add 1 vnode and force remove 1 vnode", pVgroup->vgId); if (mndAddVnodeToVgroup(pMnode, pTrans, &newVg, pArray) != 0) return -1; diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 094468c66682f34d517cbc31f22ae96531e8e373..e56f130c2c4af7a70957e29c34962f0b06331c9e 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -87,17 +87,24 @@ typedef struct SCommitInfo SCommitInfo; #define VNODE_RSMA1_DIR "rsma1" #define VNODE_RSMA2_DIR "rsma2" +#define VND_INFO_FNAME "vnode.json" + // vnd.h void* vnodeBufPoolMalloc(SVBufPool* pPool, int size); void vnodeBufPoolFree(SVBufPool* pPool, void* p); void vnodeBufPoolRef(SVBufPool* pPool); void vnodeBufPoolUnRef(SVBufPool* pPool); +int vnodeDecodeInfo(uint8_t* pData, SVnodeInfo* pInfo); // meta typedef struct SMCtbCursor SMCtbCursor; typedef struct SMStbCursor SMStbCursor; typedef struct STbUidStore STbUidStore; +#define META_BEGIN_HEAP_BUFFERPOOL 0 +#define META_BEGIN_HEAP_OS 1 +#define META_BEGIN_HEAP_NIL 2 + int metaOpen(SVnode* pVnode, SMeta** ppMeta, int8_t rollback); int metaClose(SMeta* pMeta); int metaBegin(SMeta* pMeta, int8_t fromSys); @@ -105,6 +112,7 @@ TXN* metaGetTxn(SMeta* pMeta); int metaCommit(SMeta* pMeta, TXN* txn); int metaFinishCommit(SMeta* pMeta, TXN* txn); int metaPrepareAsyncCommit(SMeta* pMeta); +int metaAbort(SMeta* pMeta); int metaCreateSTable(SMeta* pMeta, int64_t version, SVCreateStbReq* pReq); int metaAlterSTable(SMeta* pMeta, int64_t version, SVCreateStbReq* pReq); int metaDropSTable(SMeta* pMeta, int64_t verison, SVDropStbReq* pReq, SArray* tbUidList); @@ -238,6 +246,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData); // STsdbSnapWriter ======================================== int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter); int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData); +int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter); int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback); // STqSnapshotReader == int32_t tqSnapReaderOpen(STQ* pTq, int64_t sver, int64_t ever, STqSnapReader** ppReader); diff --git a/source/dnode/vnode/src/meta/metaCommit.c b/source/dnode/vnode/src/meta/metaCommit.c index 5eb27679bb71c26d081b4de22afa1c84bad7b1c9..ac8d99ccf0afbbf1074732ae75fc54e515122f80 100644 --- a/source/dnode/vnode/src/meta/metaCommit.c +++ b/source/dnode/vnode/src/meta/metaCommit.c @@ -19,19 +19,21 @@ static FORCE_INLINE void *metaMalloc(void *pPool, size_t size) { return vnodeBuf static FORCE_INLINE void metaFree(void *pPool, void *p) { vnodeBufPoolFree((SVBufPool *)pPool, p); } // begin a meta txn -int metaBegin(SMeta *pMeta, int8_t fromSys) { - void *(*xMalloc)(void *, size_t); - void (*xFree)(void *, void *); +int metaBegin(SMeta *pMeta, int8_t heap) { + void *(*xMalloc)(void *, size_t) = NULL; + void (*xFree)(void *, void *) = NULL; void *xArg = NULL; - if (fromSys) { + // default heap to META_BEGIN_HEAP_NIL + if (heap == META_BEGIN_HEAP_OS) { xMalloc = tdbDefaultMalloc; xFree = tdbDefaultFree; - } else { + } else if (heap == META_BEGIN_HEAP_BUFFERPOOL) { xMalloc = metaMalloc; xFree = metaFree; xArg = pMeta->pVnode->inUse; } + if (tdbBegin(pMeta->pEnv, &pMeta->txn, xMalloc, xFree, xArg, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { return -1; } diff --git a/source/dnode/vnode/src/meta/metaSnapshot.c b/source/dnode/vnode/src/meta/metaSnapshot.c index 6a4dcf6ead7a49f0e4d21b0d1016633fb42d17f4..974f8a92181317db061bbdb0e1c4ad7bfae0dc40 100644 --- a/source/dnode/vnode/src/meta/metaSnapshot.c +++ b/source/dnode/vnode/src/meta/metaSnapshot.c @@ -145,7 +145,7 @@ int32_t metaSnapWriterOpen(SMeta* pMeta, int64_t sver, int64_t ever, SMetaSnapWr pWriter->sver = sver; pWriter->ever = ever; - metaBegin(pMeta, 1); + metaBegin(pMeta, META_BEGIN_HEAP_NIL); *ppWriter = pWriter; return code; @@ -161,7 +161,8 @@ int32_t metaSnapWriterClose(SMetaSnapWriter** ppWriter, int8_t rollback) { SMetaSnapWriter* pWriter = *ppWriter; if (rollback) { - ASSERT(0); + code = metaAbort(pWriter->pMeta); + if (code) goto _err; } else { code = metaCommit(pWriter->pMeta, pWriter->pMeta->txn); if (code) goto _err; diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 8be49043492054c106f1877f5f06533c6644356e..c61ff343abb24c2d1cd7f16568a0fa74503bb2d5 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1376,27 +1376,34 @@ _exit: return code; } +int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter) { + int32_t code = 0; + if (pWriter->dWriter.pWriter) { + code = tsdbSnapWriteCloseFile(pWriter); + if (code) goto _exit; + } + + code = tsdbSnapWriteDelEnd(pWriter); + if (code) goto _exit; + + code = tsdbFSPrepareCommit(pWriter->pTsdb, &pWriter->fs); + if (code) goto _exit; + +_exit: + if (code) { + tsdbError("vgId:%d %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), __func__, tstrerror(code)); + } + return code; +} + int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { int32_t code = 0; STsdbSnapWriter* pWriter = *ppWriter; STsdb* pTsdb = pWriter->pTsdb; if (rollback) { - ASSERT(0); - // code = tsdbFSRollback(pWriter->pTsdb->pFS); - // if (code) goto _err; + tsdbRollbackCommit(pWriter->pTsdb); } else { - if (pWriter->dWriter.pWriter) { - code = tsdbSnapWriteCloseFile(pWriter); - if (code) goto _err; - } - - code = tsdbSnapWriteDelEnd(pWriter); - if (code) goto _err; - - code = tsdbFSPrepareCommit(pWriter->pTsdb, &pWriter->fs); - if (code) goto _err; - // lock taosThreadRwlockWrlock(&pTsdb->rwLock); diff --git a/source/dnode/vnode/src/vnd/vnodeCfg.c b/source/dnode/vnode/src/vnd/vnodeCfg.c index c96d01f93afd3feeb126f60e2f675fc849856f68..782cc69d30709c16a328cd056a8b52f6b42a5a6f 100644 --- a/source/dnode/vnode/src/vnd/vnodeCfg.c +++ b/source/dnode/vnode/src/vnd/vnodeCfg.c @@ -134,9 +134,6 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) { tjsonAddItemToArray(pNodeInfoArr, pNodeInfo); } - // add tsdb page size config - if (tjsonAddIntegerToObject(pJson, "tsdbPageSize", pCfg->tsdbPageSize) < 0) return -1; - return 0; } diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index cd56468371a81b79e4b0a2daa8298fff5bc2287a..4daab074b570a4acffcfaf976b2e7358f697eccd 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -16,11 +16,9 @@ #include "vnd.h" #include "vnodeInt.h" -#define VND_INFO_FNAME "vnode.json" #define VND_INFO_FNAME_TMP "vnode_tmp.json" static int vnodeEncodeInfo(const SVnodeInfo *pInfo, char **ppData); -static int vnodeDecodeInfo(uint8_t *pData, SVnodeInfo *pInfo); static int vnodeCommitImpl(SCommitInfo *pInfo); int vnodeBegin(SVnode *pVnode) { @@ -40,7 +38,7 @@ int vnodeBegin(SVnode *pVnode) { pVnode->state.commitID++; // begin meta - if (metaBegin(pVnode->pMeta, 0) < 0) { + if (metaBegin(pVnode->pMeta, META_BEGIN_HEAP_BUFFERPOOL) < 0) { vError("vgId:%d, failed to begin meta since %s", TD_VID(pVnode), tstrerror(terrno)); return -1; } @@ -407,7 +405,7 @@ _err: return -1; } -static int vnodeDecodeInfo(uint8_t *pData, SVnodeInfo *pInfo) { +int vnodeDecodeInfo(uint8_t *pData, SVnodeInfo *pInfo) { SJson *pJson = NULL; pJson = tjsonParse(pData); diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index a34744a1da9055f5131ef96afdca0e3e79c9e90e..40705e553b3d22192c8d10b7d2bf292e2dce5fec 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -21,6 +21,8 @@ struct SVSnapReader { int64_t sver; int64_t ever; int64_t index; + // config + int8_t cfgDone; // meta int8_t metaDone; SMetaSnapReader *pMetaReader; @@ -88,6 +90,53 @@ int32_t vnodeSnapReaderClose(SVSnapReader *pReader) { int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData) { int32_t code = 0; + // CONFIG ============== + // FIXME: if commit multiple times and the config changed? + if (!pReader->cfgDone) { + char fName[TSDB_FILENAME_LEN]; + if (pReader->pVnode->pTfs) { + snprintf(fName, TSDB_FILENAME_LEN, "%s%s%s%s%s", tfsGetPrimaryPath(pReader->pVnode->pTfs), TD_DIRSEP, + pReader->pVnode->path, TD_DIRSEP, VND_INFO_FNAME); + } else { + snprintf(fName, TSDB_FILENAME_LEN, "%s%s%s", pReader->pVnode->path, TD_DIRSEP, VND_INFO_FNAME); + } + + TdFilePtr pFile = taosOpenFile(fName, TD_FILE_READ); + if (NULL == pFile) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + int64_t size; + if (taosFStatFile(pFile, &size, NULL) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + taosCloseFile(&pFile); + goto _err; + } + + *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + size + 1); + if (*ppData == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + taosCloseFile(&pFile); + goto _err; + } + ((SSnapDataHdr *)(*ppData))->type = SNAP_DATA_CFG; + ((SSnapDataHdr *)(*ppData))->size = size + 1; + ((SSnapDataHdr *)(*ppData))->data[size] = '\0'; + + if (taosReadFile(pFile, ((SSnapDataHdr *)(*ppData))->data, size) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + taosMemoryFree(*ppData); + taosCloseFile(&pFile); + goto _err; + } + + taosCloseFile(&pFile); + + pReader->cfgDone = 1; + goto _exit; + } + // META ============== if (!pReader->metaDone) { // open reader if not @@ -230,6 +279,8 @@ struct SVSnapWriter { int64_t ever; int64_t commitID; int64_t index; + // config + SVnodeInfo info; // meta SMetaSnapWriter *pMetaSnapWriter; // tsdb @@ -248,6 +299,10 @@ int32_t vnodeSnapWriterOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapWr int32_t code = 0; SVSnapWriter *pWriter = NULL; + // commit memory data + vnodeAsyncCommit(pVnode); + tsem_wait(&pVnode->canCommit); + // alloc pWriter = (SVSnapWriter *)taosMemoryCalloc(1, sizeof(*pWriter)); if (pWriter == NULL) { @@ -258,16 +313,8 @@ int32_t vnodeSnapWriterOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapWr pWriter->sver = sver; pWriter->ever = ever; - // commit it - code = vnodeSyncCommit(pVnode); - if (code) { - taosMemoryFree(pWriter); - goto _err; - } - // inc commit ID - pVnode->state.commitID++; - pWriter->commitID = pVnode->state.commitID; + pWriter->commitID = ++pVnode->state.commitID; vInfo("vgId:%d, vnode snapshot writer opened, sver:%" PRId64 " ever:%" PRId64 " commit id:%" PRId64, TD_VID(pVnode), sver, ever, pWriter->commitID); @@ -284,53 +331,89 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot * int32_t code = 0; SVnode *pVnode = pWriter->pVnode; + // prepare + if (pWriter->pTsdbSnapWriter) { + tsdbSnapWriterPrepareClose(pWriter->pTsdbSnapWriter); + } + + // commit json + if (!rollback) { + pVnode->config = pWriter->info.config; + pVnode->state = (SVState){.committed = pWriter->info.state.committed, + .applied = pWriter->info.state.committed, + .commitID = pWriter->commitID, + .commitTerm = pWriter->info.state.commitTerm, + .applyTerm = pWriter->info.state.commitTerm}; + pVnode->statis = pWriter->info.statis; + char dir[TSDB_FILENAME_LEN] = {0}; + if (pWriter->pVnode->pTfs) { + snprintf(dir, TSDB_FILENAME_LEN, "%s%s%s", tfsGetPrimaryPath(pVnode->pTfs), TD_DIRSEP, pVnode->path); + } else { + snprintf(dir, TSDB_FILENAME_LEN, "%s", pWriter->pVnode->path); + } + + vnodeCommitInfo(dir, &pWriter->info); + } else { + vnodeRollback(pWriter->pVnode); + } + + // commit/rollback sub-system if (pWriter->pMetaSnapWriter) { code = metaSnapWriterClose(&pWriter->pMetaSnapWriter, rollback); - if (code) goto _err; + if (code) goto _exit; } if (pWriter->pTsdbSnapWriter) { code = tsdbSnapWriterClose(&pWriter->pTsdbSnapWriter, rollback); - if (code) goto _err; + if (code) goto _exit; } if (pWriter->pRsmaSnapWriter) { code = rsmaSnapWriterClose(&pWriter->pRsmaSnapWriter, rollback); - if (code) goto _err; + if (code) goto _exit; } - if (!rollback) { - SVnodeInfo info = {0}; - char dir[TSDB_FILENAME_LEN]; - - pVnode->state.committed = pWriter->ever; - pVnode->state.applied = pWriter->ever; - pVnode->state.applyTerm = pSnapshot->lastApplyTerm; - pVnode->state.commitTerm = pSnapshot->lastApplyTerm; - - info.config = pVnode->config; - info.state.committed = pVnode->state.applied; - info.state.commitTerm = pVnode->state.applyTerm; - info.state.commitID = pVnode->state.commitID; - snprintf(dir, TSDB_FILENAME_LEN, "%s%s%s", tfsGetPrimaryPath(pVnode->pTfs), TD_DIRSEP, pVnode->path); - code = vnodeSaveInfo(dir, &info); - if (code) goto _err; - - code = vnodeCommitInfo(dir, &info); - if (code) goto _err; - - vnodeBegin(pVnode); - } else { - ASSERT(0); - } + vnodeBegin(pVnode); _exit: - vInfo("vgId:%d, vnode snapshot writer closed, rollback:%d", TD_VID(pVnode), rollback); - taosMemoryFree(pWriter); + if (code) { + vError("vgId:%d, vnode snapshot writer close failed since %s", TD_VID(pWriter->pVnode), tstrerror(code)); + } else { + vInfo("vgId:%d, vnode snapshot writer closed, rollback:%d", TD_VID(pVnode), rollback); + taosMemoryFree(pWriter); + } + tsem_post(&pVnode->canCommit); return code; +} -_err: - vError("vgId:%d, vnode snapshot writer close failed since %s", TD_VID(pWriter->pVnode), tstrerror(code)); +static int32_t vnodeSnapWriteInfo(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { + int32_t code = 0; + + SSnapDataHdr *pHdr = (SSnapDataHdr *)pData; + + // decode info + if (vnodeDecodeInfo(pHdr->data, &pWriter->info) < 0) { + code = TSDB_CODE_INVALID_MSG; + goto _exit; + } + + // change some value + pWriter->info.state.commitID = pWriter->commitID; + + // modify info as needed + char dir[TSDB_FILENAME_LEN] = {0}; + if (pWriter->pVnode->pTfs) { + snprintf(dir, TSDB_FILENAME_LEN, "%s%s%s", tfsGetPrimaryPath(pWriter->pVnode->pTfs), TD_DIRSEP, + pWriter->pVnode->path); + } else { + snprintf(dir, TSDB_FILENAME_LEN, "%s", pWriter->pVnode->path); + } + if (vnodeSaveInfo(dir, &pWriter->info) < 0) { + code = terrno; + goto _exit; + } + +_exit: return code; } @@ -347,6 +430,10 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { pHdr->type, nData); switch (pHdr->type) { + case SNAP_DATA_CFG: { + code = vnodeSnapWriteInfo(pWriter, pData, nData); + if (code) goto _err; + } break; case SNAP_DATA_META: { // meta if (pWriter->pMetaSnapWriter == NULL) { diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 0fc42f3744194f7695ff4806c6260ff7082b9c5c..60928881362c563bc6f4755c871624e75e336d5a 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -317,11 +317,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp // commit if need if (vnodeShouldCommit(pVnode)) { vInfo("vgId:%d, commit at version %" PRId64, TD_VID(pVnode), version); -#if 0 - vnodeSyncCommit(pVnode); -#else vnodeAsyncCommit(pVnode); -#endif // start a new one if (vnodeBegin(pVnode) < 0) { diff --git a/source/libs/parser/src/parInsertSql.c b/source/libs/parser/src/parInsertSql.c index 36420599b3823339bd5fb5f53d0c4a9c93c47ea4..a6bead89aae3cebcf78e9753df5a90f6568c773f 100644 --- a/source/libs/parser/src/parInsertSql.c +++ b/source/libs/parser/src/parInsertSql.c @@ -1411,6 +1411,7 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, SVnodeModifOpStmt* pStmt, (*pNumOfRows) = 0; char* pLine = NULL; int64_t readLen = 0; + bool firstLine = (pStmt->fileProcessing == false); pStmt->fileProcessing = false; while (TSDB_CODE_SUCCESS == code && (readLen = taosGetLineFile(pStmt->fp, &pLine)) != -1) { if (('\r' == pLine[readLen - 1]) || ('\n' == pLine[readLen - 1])) { @@ -1418,6 +1419,7 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, SVnodeModifOpStmt* pStmt, } if (readLen == 0) { + firstLine = false; continue; } @@ -1431,6 +1433,11 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, SVnodeModifOpStmt* pStmt, strtolower(pLine, pLine); const char* pRow = pLine; code = parseOneRow(pCxt, (const char**)&pRow, pDataBuf, &gotRow, &token); + if (code && firstLine) { + firstLine = false; + code = 0; + continue; + } } if (TSDB_CODE_SUCCESS == code && gotRow) { @@ -1442,6 +1449,8 @@ static int32_t parseCsvFile(SInsertParseContext* pCxt, SVnodeModifOpStmt* pStmt, pStmt->fileProcessing = true; break; } + + firstLine = false; } taosMemoryFree(pLine); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index d6ce77193a7c38a670cd704634f91f14c7f60d6a..6a545424fcee50e8a77affb4e92a13052afd27ae 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -233,7 +233,7 @@ int32_t syncSendTimeoutRsp(int64_t rid, int64_t seq) { rpcSendResponse(&rpcMsg); return 0; } else { - sInfo("no rpcinfo to send timeout response, seq:%" PRId64, seq); + sError("no message handle to send timeout response, seq:%" PRId64, seq); return -1; } } diff --git a/source/libs/sync/src/syncRespMgr.c b/source/libs/sync/src/syncRespMgr.c index 049b02d73e6a89344aa0266fc086cc69db943cb5..79a38cad7a55fdcc0a587c48299d155d3f396931 100644 --- a/source/libs/sync/src/syncRespMgr.c +++ b/source/libs/sync/src/syncRespMgr.c @@ -35,11 +35,16 @@ SSyncRespMgr *syncRespMgrCreate(void *data, int64_t ttl) { pObj->seqNum = 0; taosThreadMutexInit(&(pObj->mutex), NULL); + SSyncNode *pNode = pObj->data; + sTrace("vgId:%d, create resp manager", pNode->vgId); return pObj; } void syncRespMgrDestroy(SSyncRespMgr *pObj) { if (pObj != NULL) { + SSyncNode *pNode = pObj->data; + sTrace("vgId:%d, destroy resp manager", pNode->vgId); + taosThreadMutexLock(&pObj->mutex); taosHashCleanup(pObj->pRespHash); taosThreadMutexUnlock(&pObj->mutex); @@ -81,6 +86,8 @@ int32_t syncRespMgrGet(SSyncRespMgr *pObj, uint64_t seq, SRespStub *pStub) { taosThreadMutexUnlock(&pObj->mutex); return 1; // get one object + } else { + sNError(pObj->data, "get message handle, no object of seq:%" PRIu64, seq); } taosThreadMutexUnlock(&pObj->mutex); @@ -99,6 +106,8 @@ int32_t syncRespMgrGetAndDel(SSyncRespMgr *pObj, uint64_t seq, SRpcHandleInfo *p taosThreadMutexUnlock(&pObj->mutex); return 1; // get one object + } else { + sNError(pObj->data, "get-and-del message handle, no object of seq:%" PRIu64, seq); } taosThreadMutexUnlock(&pObj->mutex); @@ -114,7 +123,7 @@ static void syncRespCleanByTTL(SSyncRespMgr *pObj, int64_t ttl, bool rsp) { SArray *delIndexArray = taosArrayInit(4, sizeof(uint64_t)); if (delIndexArray == NULL) return; - sDebug("vgId:%d, resp mgr begin clean by ttl", pSyncNode->vgId); + sDebug("vgId:%d, resp manager begin clean by ttl", pSyncNode->vgId); while (pStub) { size_t len; void *key = taosHashGetKey(pStub, &len); @@ -143,34 +152,39 @@ static void syncRespCleanByTTL(SSyncRespMgr *pObj, int64_t ttl, bool rsp) { // TODO: and make rpcMsg body, call commit cb // pSyncNode->pFsm->FpCommitCb(pSyncNode->pFsm, &pStub->rpcMsg, cbMeta); - - pStub->rpcMsg.code = TSDB_CODE_SYN_NOT_LEADER; - if (pStub->rpcMsg.info.handle != NULL) { - tmsgSendRsp(&pStub->rpcMsg); - } + SRpcMsg rpcMsg = {.info = pStub->rpcMsg.info, .code = TSDB_CODE_SYN_TIMEOUT}; + sInfo("vgId:%d, message handle:%p expired, type:%s ahandle:%p", pSyncNode->vgId, rpcMsg.info.handle, + TMSG_INFO(pStub->rpcMsg.msgType), rpcMsg.info.ahandle); + rpcSendResponse(&rpcMsg); } pStub = taosHashIterate(pObj->pRespHash, pStub); } int32_t arraySize = taosArrayGetSize(delIndexArray); - sDebug("vgId:%d, resp mgr end clean by ttl, sum:%d, cnt:%d, array-size:%d", pSyncNode->vgId, sum, cnt, arraySize); + sDebug("vgId:%d, resp manager end clean by ttl, sum:%d, cnt:%d, array-size:%d", pSyncNode->vgId, sum, cnt, arraySize); for (int32_t i = 0; i < arraySize; ++i) { uint64_t *pSeqNum = taosArrayGet(delIndexArray, i); taosHashRemove(pObj->pRespHash, pSeqNum, sizeof(uint64_t)); - sDebug("vgId:%d, resp mgr clean by ttl, seq:%" PRId64 "", pSyncNode->vgId, *pSeqNum); + sDebug("vgId:%d, resp manager clean by ttl, seq:%" PRId64, pSyncNode->vgId, *pSeqNum); } taosArrayDestroy(delIndexArray); } void syncRespCleanRsp(SSyncRespMgr *pObj) { + SSyncNode *pNode = pObj->data; + sTrace("vgId:%d, clean all rsp", pNode->vgId); + taosThreadMutexLock(&pObj->mutex); syncRespCleanByTTL(pObj, -1, true); taosThreadMutexUnlock(&pObj->mutex); } void syncRespClean(SSyncRespMgr *pObj) { + SSyncNode *pNode = pObj->data; + sTrace("vgId:%d, clean rsp by ttl", pNode->vgId); + taosThreadMutexLock(&pObj->mutex); syncRespCleanByTTL(pObj, pObj->ttl, false); taosThreadMutexUnlock(&pObj->mutex);