Merge branch '3.0' of https://github.com/taosdata/TDengine into enh/tsdb_optimize

9d031f4d · Hongze Cheng · 5bcc9c54 · 884e7d0d · 9d031f4d · 9d031f4d
7 changed file
--- a/docs/en/07-develop/09-udf.md
+++ b/docs/en/07-develop/09-udf.md
@@ -10,7 +10,7 @@ User-defined functions can be scalar functions or aggregate functions. Scalar fu
 TDengine supports user-defined functions written in C or Python. This document describes the usage of user-defined functions.
-## Implement a UDF in C 
+## Implement a UDF in C
 When you create a user-defined function, you must implement standard interface functions:
 - For scalar functions, implement the `scalarfn` interface function.
@@ -111,13 +111,13 @@ Interface functions return a value that indicates whether the operation was succ
 For information about the parameters for interface functions, see Data Model
 #### Scalar Interface
- `int32_t scalarfn(SUdfDataBlock* inputDataBlock, SUdfColumn *resultColumn)` 
+ `int32_t scalarfn(SUdfDataBlock* inputDataBlock, SUdfColumn *resultColumn)`
 Replace `scalarfn` with the name of your function. This function performs scalar calculations on data blocks. You can configure a value through the parameters in the `resultColumn` structure.
 The parameters in the function are defined as follows:
  - inputDataBlock: The data block to input.
-  - resultColumn: The column to output. The column to output. 
+  - resultColumn: The column to output. The column to output.
 #### Aggregate Interface
@@ -197,7 +197,7 @@ The data structure is described as follows:
 - The SUdfDataBlock block includes the number of rows (numOfRows) and the number of columns (numCols). udfCols[i] (0 <= i <= numCols-1) indicates that each column is of type SUdfColumn.
 - SUdfColumn includes the definition of the data type of the column (colMeta) and the data in the column (colData).
 - The member definitions of SUdfColumnMeta are the same as the data type definitions in `taos.h`.
- The data in SUdfColumnData can become longer. varLenCol indicates variable-length data, and fixLenCol indicates fixed-length data. 
+- The data in SUdfColumnData can become longer. varLenCol indicates variable-length data, and fixLenCol indicates fixed-length data.
 - SUdfInterBuf defines the intermediate structure `buffer` and the number of results in the buffer `numOfResult`.
 Additional functions are defined in `taosudf.h` to make it easier to work with these structures.
@@ -285,9 +285,9 @@ def init():
 def destroy():
    # destroy
 def process(input: datablock) -> tuple[output_type]:
-    # process input datablock, 
+    # process input datablock,
    # datablock.data(row, col) is to access the python object in location(row,col)
-    # return tuple object consisted of object of type outputtype   
+    # return tuple object consisted of object of type outputtype
 ```
 ### Implement an Aggregate UDF in Python
@@ -303,12 +303,12 @@ def start() -> bytes:
    #return serialize(init_state)
 def reduce(inputs: datablock, buf: bytes) -> bytes
    # deserialize buf to state
-    # reduce the inputs and state into new_state. 
+    # reduce the inputs and state into new_state.
-    # use inputs.data(i,j) to access python ojbect of location(i,j)
+    # use inputs.data(i,j) to access python object of location(i,j)
    # serialize new_state into new_state_bytes
-    return new_state_bytes   
+    return new_state_bytes
 def finish(buf: bytes) -> output_type:
-    #return obj of type outputtype   
+    #return obj of type outputtype
 ```
 ### Python UDF Interface Definition
@@ -328,7 +328,7 @@ def finish(buf: bytes) -> output_type:
 ```
 - first `start()` is called to return the initial result in type `bytes`
- then the input data are divided into multiple data blocks and for each block `input`, `reduce` is called with the data block `input` and the current result `buf` bytes and generates a new intermediate result buffer. 
+- then the input data are divided into multiple data blocks and for each block `input`, `reduce` is called with the data block `input` and the current result `buf` bytes and generates a new intermediate result buffer.
 - finally, the `finish` function is called on the intermediate result `buf` and outputs 0 or 1 data of type `output_type`
@@ -337,7 +337,7 @@ def finish(buf: bytes) -> output_type:
 def init()
 def destroy()
 ```
-Implement `init` for initialization and `destroy` for termination. 
+Implement `init` for initialization and `destroy` for termination.
 ### Data Mapping between TDengine SQL and Python UDF
@@ -360,7 +360,7 @@ sudo pip install taospyudf
 ldconfig
 ```
 2. If PYTHONPATH is needed to find Python packages when the Python UDF executes, include the PYTHONPATH contents into the udfdLdLibPath variable of the taos.cfg configuration file
 ### Python UDF Sample Code
 #### Scalar Function [pybitand](https://github.com/taosdata/TDengine/blob/3.0/tests/script/sh/pybitand.py)

--- a/docs/en/14-reference/12-config/index.md
+++ b/docs/en/14-reference/12-config/index.md
@@ -111,7 +111,7 @@ The parameters described in this document by the effect that they have on the sy
 | Attribute     | Description                                    |
 | ------------- | ---------------------------------------------- |
 | Applicable    | Client/Server                                  |
-| Meaning       | The maximum waiting time to get avaliable conn |
+| Meaning       | The maximum waiting time to get available conn |
 | Value Range   | 10-50000000(ms)                                |
 | Default Value | 500000                                         |

--- a/docs/zh/07-develop/09-udf.md
+++ b/docs/zh/07-develop/09-udf.md
@@ -303,7 +303,7 @@ def start() -> bytes:
 def reduce(inputs: datablock, buf: bytes) -> bytes
    # deserialize buf to state
    # reduce the inputs and state into new_state. 
-    # use inputs.data(i,j) to access python ojbect of location(i,j)
+    # use inputs.data(i,j) to access python object of location(i,j)
    # serialize new_state into new_state_bytes
    return new_state_bytes   
 def finish(buf: bytes) -> output_type:

--- a/include/libs/wal/wal.h
+++ b/include/libs/wal/wal.h
@@ -149,6 +149,7 @@ struct SWalReader {
  TdFilePtr      pIdxFile;
  int64_t        curFileFirstVer;
  int64_t        curVersion;
+  int64_t        skipToVersion; // skip data and jump to destination version, usually used by stream resume ignoring untreated data
  int64_t        capacity;
  TdThreadMutex  mutex;
  SWalFilterCond cond;
@@ -200,6 +201,8 @@ int32_t     walReaderSeekVer(SWalReader *pRead, int64_t ver);
 int32_t     walNextValidMsg(SWalReader *pRead);
 int64_t     walReaderGetCurrentVer(const SWalReader *pReader);
 int64_t     walReaderGetValidFirstVer(const SWalReader *pReader);
+int64_t     walReaderGetSkipToVersion(SWalReader *pReader);
+void        walReaderSetSkipToVersion(SWalReader *pReader, int64_t ver);
 void        walReaderValidVersionRange(SWalReader *pReader, int64_t *sver, int64_t *ever);
 void        walReaderVerifyOffset(SWalReader *pWalReader, STqOffsetVal* pOffset);

--- a/source/dnode/vnode/src/tq/tq.c
+++ b/source/dnode/vnode/src/tq/tq.c
@@ -1304,19 +1304,22 @@ int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msg
 int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
  SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg;
+  int32_t      vgId = pTq->pStreamMeta->vgId;
  SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
  if (pTask) {
    atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus);
    // no lock needs to secure the access of the version
-    if (pReq->igUntreated && pTask->taskLevel == TASK_LEVEL__SOURCE) {  // discard all the data  when the stream task is suspended.
+    if (pReq->igUntreated && pTask->taskLevel == TASK_LEVEL__SOURCE) {
-      pTask->chkInfo.currentVer = sversion;
+      // discard all the data  when the stream task is suspended.
-      walReaderSeekVer(pTask->exec.pWalReader, sversion);
+      walReaderSetSkipToVersion(pTask->exec.pWalReader, sversion);
-      tqDebug("vgId:%d s-task:%s resume to normal from the latest version:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d", pTq->pStreamMeta->vgId,
+      tqDebug("vgId:%d s-task:%s resume to exec, prev paused version:%" PRId64 ", start from vnode ver:%" PRId64
-              pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
+              ", schedStatus:%d",
+              vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
    } else {  // from the previous paused version and go on
-      tqDebug("vgId:%d s-task:%s resume to normal from paused ver:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d", pTq->pStreamMeta->vgId,
+      tqDebug("vgId:%d s-task:%s resume to exec, from paused ver:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d",
-              pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
+              vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
    }
    if (pTask->taskLevel == TASK_LEVEL__SOURCE && taosQueueItemSize(pTask->inputQueue->queue) == 0) {
@@ -1325,6 +1328,8 @@ int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms
      streamSchedExec(pTask);
    }
    streamMetaReleaseTask(pTq->pStreamMeta, pTask);
+  } else {
+    tqError("vgId:%d failed to find the s-task:0x%x for resume stream task", vgId, pReq->taskId);
  }
  return 0;
@@ -1433,7 +1438,7 @@ int32_t tqStartStreamTasks(STQ* pTq) {
  int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
  if (numOfTasks == 0) {
    tqInfo("vgId:%d no stream tasks exist", vgId);
-    taosWUnLockLatch(&pTq->pStreamMeta->lock);
+    taosWUnLockLatch(&pMeta->lock);
    return 0;
  }
@@ -1441,7 +1446,7 @@ int32_t tqStartStreamTasks(STQ* pTq) {
  if (pMeta->walScanCounter > 1) {
    tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter);
-    taosWUnLockLatch(&pTq->pStreamMeta->lock);
+    taosWUnLockLatch(&pMeta->lock);
    return 0;
  }
@@ -1449,7 +1454,7 @@ int32_t tqStartStreamTasks(STQ* pTq) {
  if (pRunReq == NULL) {
    terrno = TSDB_CODE_OUT_OF_MEMORY;
    tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr());
-    taosWUnLockLatch(&pTq->pStreamMeta->lock);
+    taosWUnLockLatch(&pMeta->lock);
    return -1;
  }
@@ -1460,7 +1465,7 @@ int32_t tqStartStreamTasks(STQ* pTq) {
  SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
  tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg);
-  taosWUnLockLatch(&pTq->pStreamMeta->lock);
+  taosWUnLockLatch(&pMeta->lock);
  return 0;
 }
--- a/source/dnode/vnode/src/tq/tqRestore.c
+++ b/source/dnode/vnode/src/tq/tqRestore.c
@@ -87,6 +87,16 @@ static int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) {
    }
  }
+  int64_t skipToVer = walReaderGetSkipToVersion(pTask->exec.pWalReader);
+  if (skipToVer != 0 && skipToVer > pTask->chkInfo.currentVer) {
+    int32_t code = walReaderSeekVer(pTask->exec.pWalReader, skipToVer);
+    if (code != TSDB_CODE_SUCCESS) {  // no data in wal, quit
+      return code;
+    }
+    tqDebug("vgId:%d s-task:%s wal reader jump to ver:%" PRId64, vgId, pTask->id.idStr, skipToVer);
+  }
  return TSDB_CODE_SUCCESS;
 }

--- a/source/libs/wal/src/walRead.c
+++ b/source/libs/wal/src/walRead.c
@@ -108,6 +108,14 @@ int32_t walNextValidMsg(SWalReader *pReader) {
 int64_t walReaderGetCurrentVer(const SWalReader *pReader) { return pReader->curVersion; }
 int64_t walReaderGetValidFirstVer(const SWalReader *pReader) { return walGetFirstVer(pReader->pWal); }
+void    walReaderSetSkipToVersion(SWalReader *pReader, int64_t ver) { atomic_store_64(&pReader->skipToVersion, ver); }
+// this function is NOT multi-thread safe, and no need to be.
+int64_t walReaderGetSkipToVersion(SWalReader *pReader) {
+  int64_t newVersion = pReader->skipToVersion;
+  pReader->skipToVersion = 0;
+  return newVersion;
+}
 void walReaderValidVersionRange(SWalReader *pReader, int64_t *sver, int64_t *ever) {
  *sver = walGetFirstVer(pReader->pWal);