Merge pull request #20414 from taosdata/feat/TD-23058

feat: correct slimit&limit clause

Merge pull request #20414 from taosdata/feat/TD-23058
feat: correct slimit&limit clause
041f03e9 · Shengliang Guan · GitHub · 5a957eeb · 91df1b32 · 041f03e9
14 changed file
--- a/docs/zh/12-taos-sql/06-select.md
+++ b/docs/zh/12-taos-sql/06-select.md
@@ -248,11 +248,11 @@ NULLS 语法用来指定 NULL 值在排序中输出的位置。NULLS LAST 是升

 LIMIT 控制输出条数，OFFSET 指定从第几条之后开始输出。LIMIT/OFFSET 对结果集的执行顺序在 ORDER BY 之后。LIMIT 5 OFFSET 2 可以简写为 LIMIT 2, 5，都输出第 3 行到第 7 行数据。

-在有 PARTITION BY 子句时，LIMIT 控制的是每个切分的分片中的输出，而不是总的结果集输出。
+在有 PARTITION BY/GROUP BY 子句时，LIMIT 控制的是每个切分的分片中的输出，而不是总的结果集输出。

 ## SLIMIT

-SLIMIT 和 PARTITION BY 子句一起使用，用来控制输出的分片的数量。SLIMIT 5 SOFFSET 2 可以简写为 SLIMIT 2, 5，都表示输出第 3 个到第 7 个分片。
+SLIMIT 和 PARTITION BY/GROUP BY 子句一起使用，用来控制输出的分片的数量。SLIMIT 5 SOFFSET 2 可以简写为 SLIMIT 2, 5，都表示输出第 3 个到第 7 个分片。

 需要注意，如果有 ORDER BY 子句，则输出只有一个分片。


--- a/include/util/taoserror.h
+++ b/include/util/taoserror.h
@@ -642,7 +642,7 @@ int32_t* taosGetErrno();
 #define TSDB_CODE_PAR_INCORRECT_NUM_OF_COL      TAOS_DEF_ERROR_CODE(0, 0x2634)
 #define TSDB_CODE_PAR_INCORRECT_TIMESTAMP_VAL   TAOS_DEF_ERROR_CODE(0, 0x2635)
 #define TSDB_CODE_PAR_OFFSET_LESS_ZERO          TAOS_DEF_ERROR_CODE(0, 0x2637)
-#define TSDB_CODE_PAR_SLIMIT_LEAK_PARTITION_BY  TAOS_DEF_ERROR_CODE(0, 0x2638)
+#define TSDB_CODE_PAR_SLIMIT_LEAK_PARTITION_GROUP_BY  TAOS_DEF_ERROR_CODE(0, 0x2638)
 #define TSDB_CODE_PAR_INVALID_TOPIC_QUERY       TAOS_DEF_ERROR_CODE(0, 0x2639)
 #define TSDB_CODE_PAR_INVALID_DROP_STABLE       TAOS_DEF_ERROR_CODE(0, 0x263A)
 #define TSDB_CODE_PAR_INVALID_FILL_TIME_RANGE   TAOS_DEF_ERROR_CODE(0, 0x263B)

--- a/source/libs/executor/inc/executorimpl.h
+++ b/source/libs/executor/inc/executorimpl.h
@@ -750,7 +750,7 @@ void clearResultRowInitFlag(SqlFunctionCtx* pCtx, int32_t numOfOutput);

 SResultRow* doSetResultOutBufByKey(SDiskbasedBuf* pResultBuf, SResultRowInfo* pResultRowInfo, char* pData,
                                   int16_t bytes, bool masterscan, uint64_t groupId, SExecTaskInfo* pTaskInfo,
-                                   bool isIntervalQuery, SAggSupporter* pSup);
+                                   bool isIntervalQuery, SAggSupporter* pSup, bool keepGroup);
 // operator creater functions
 // clang-format off
 SOperatorInfo* createExchangeOperatorInfo(void* pTransporter, SExchangePhysiNode* pExNode, SExecTaskInfo* pTaskInfo);

--- a/source/libs/executor/src/executorimpl.c
+++ b/source/libs/executor/src/executorimpl.c
@@ -195,9 +195,12 @@ SResultRow* getNewResultRow(SDiskbasedBuf* pResultBuf, int32_t* currentPageId, i
 */
 SResultRow* doSetResultOutBufByKey(SDiskbasedBuf* pResultBuf, SResultRowInfo* pResultRowInfo, char* pData,
                                   int16_t bytes, bool masterscan, uint64_t groupId, SExecTaskInfo* pTaskInfo,
-                                   bool isIntervalQuery, SAggSupporter* pSup) {
+                                   bool isIntervalQuery, SAggSupporter* pSup, bool keepGroup) {
  SET_RES_WINDOW_KEY(pSup->keyBuf, pData, bytes, groupId);
-
+  if (!keepGroup) {
+    *(uint64_t*)pSup->keyBuf = calcGroupId(pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));
+  }
+  
  SResultRowPosition* p1 =
      (SResultRowPosition*)tSimpleHashGet(pSup->pResultRowHashTable, pSup->keyBuf, GET_RES_WINDOW_KEY_LEN(bytes));

@@ -1034,7 +1037,7 @@ void doSetTableGroupOutputBuf(SOperatorInfo* pOperator, int32_t numOfOutput, uin
  int32_t*        rowEntryInfoOffset = pOperator->exprSupp.rowEntryInfoOffset;

  SResultRow* pResultRow = doSetResultOutBufByKey(pAggInfo->aggSup.pResultBuf, pResultRowInfo, (char*)&groupId,
-                                                  sizeof(groupId), true, groupId, pTaskInfo, false, &pAggInfo->aggSup);
+                                                  sizeof(groupId), true, groupId, pTaskInfo, false, &pAggInfo->aggSup, true);
  /*
   * not assign result buffer yet, add new result buffer
   * all group belong to one result set, and each group result has different group id so set the id to be one

--- a/source/libs/executor/src/groupoperator.c
+++ b/source/libs/executor/src/groupoperator.c
@@ -277,6 +277,7 @@ static void doHashGroupbyAgg(SOperatorInfo* pOperator, SSDataBlock* pBlock) {
  terrno = TSDB_CODE_SUCCESS;

  int32_t num = 0;
+  uint64_t groupId = 0;
  for (int32_t j = 0; j < pBlock->info.rows; ++j) {
    // Compare with the previous row of this column, and do not set the output buffer again if they are identical.
    if (!pInfo->isInit) {
@@ -473,6 +474,8 @@ SOperatorInfo* createGroupOperatorInfo(SOperatorInfo* downstream, SAggPhysiNode*
  initResultRowInfo(&pInfo->binfo.resultRowInfo);
  setOperatorInfo(pOperator, "GroupbyAggOperator", 0, true, OP_NOT_OPENED, pInfo, pTaskInfo);

+  pInfo->binfo.mergeResultBlock = pAggNode->mergeDataBlock;
+
  pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, hashGroupbyAggregate, NULL, destroyGroupOperatorInfo,
                                         optrDefaultBufFn, NULL);
  code = appendDownstream(pOperator, &downstream, 1);
@@ -917,7 +920,7 @@ int32_t setGroupResultOutputBuf(SOperatorInfo* pOperator, SOptrBasicInfo* binfo,
  SqlFunctionCtx* pCtx = pOperator->exprSupp.pCtx;

  SResultRow* pResultRow =
-      doSetResultOutBufByKey(pBuf, pResultRowInfo, (char*)pData, bytes, true, groupId, pTaskInfo, false, pAggSup);
+      doSetResultOutBufByKey(pBuf, pResultRowInfo, (char*)pData, bytes, true, groupId, pTaskInfo, false, pAggSup, false);
  assert(pResultRow != NULL);

  setResultRowInitCtx(pResultRow, pCtx, numOfCols, pOperator->exprSupp.rowEntryInfoOffset);

--- a/source/libs/executor/src/projectoperator.c
+++ b/source/libs/executor/src/projectoperator.c
@@ -580,7 +580,7 @@ void setFunctionResultOutput(SOperatorInfo* pOperator, SOptrBasicInfo* pInfo, SA
  int64_t     tid = 0;
  int64_t     groupId = 0;
  SResultRow* pRow = doSetResultOutBufByKey(pSup->pResultBuf, pResultRowInfo, (char*)&tid, sizeof(tid), true, groupId,
-                                            pTaskInfo, false, pSup);
+                                            pTaskInfo, false, pSup, true);

  for (int32_t i = 0; i < numOfExprs; ++i) {
    struct SResultRowEntryInfo* pEntry = getResultEntryInfo(pRow, i, rowEntryInfoOffset);

--- a/source/libs/executor/src/timewindowoperator.c
+++ b/source/libs/executor/src/timewindowoperator.c
@@ -78,7 +78,7 @@ static int32_t setTimeWindowOutputBuf(SResultRowInfo* pResultRowInfo, STimeWindo
                                      int32_t numOfOutput, int32_t* rowEntryInfoOffset, SAggSupporter* pAggSup,
                                      SExecTaskInfo* pTaskInfo) {
  SResultRow* pResultRow = doSetResultOutBufByKey(pAggSup->pResultBuf, pResultRowInfo, (char*)&win->skey, TSDB_KEYSIZE,
-                                                  masterscan, tableGroupId, pTaskInfo, true, pAggSup);
+                                                  masterscan, tableGroupId, pTaskInfo, true, pAggSup, true);

  if (pResultRow == NULL) {
    *pResult = NULL;

--- a/source/libs/parser/src/parTranslater.c
+++ b/source/libs/parser/src/parTranslater.c
@@ -3372,8 +3372,8 @@ static int32_t checkLimit(STranslateContext* pCxt, SSelectStmt* pSelect) {
    return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_OFFSET_LESS_ZERO);
  }

-  if (NULL != pSelect->pSlimit && NULL == pSelect->pPartitionByList) {
-    return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_SLIMIT_LEAK_PARTITION_BY);
+  if (NULL != pSelect->pSlimit && (NULL == pSelect->pPartitionByList && NULL == pSelect->pGroupByList)) {
+    return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_PAR_SLIMIT_LEAK_PARTITION_GROUP_BY);
  }

  return TSDB_CODE_SUCCESS;

--- a/source/libs/parser/src/parUtil.c
+++ b/source/libs/parser/src/parUtil.c
@@ -103,8 +103,8 @@ static char* getSyntaxErrFormat(int32_t errCode) {
      return "Incorrect TIMESTAMP value: %s";
    case TSDB_CODE_PAR_OFFSET_LESS_ZERO:
      return "soffset/offset can not be less than 0";
-    case TSDB_CODE_PAR_SLIMIT_LEAK_PARTITION_BY:
-      return "slimit/soffset only available for PARTITION BY query";
+    case TSDB_CODE_PAR_SLIMIT_LEAK_PARTITION_GROUP_BY:
+      return "slimit/soffset only available for PARTITION/GROUP BY query";
    case TSDB_CODE_PAR_INVALID_TOPIC_QUERY:
      return "Invalid topic query";
    case TSDB_CODE_PAR_INVALID_DROP_STABLE:

--- a/source/util/src/terror.c
+++ b/source/util/src/terror.c
@@ -515,7 +515,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_PAR_ONLY_ONE_JSON_TAG,         "Only one tag if ther
 TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INCORRECT_NUM_OF_COL,      "Query block has incorrect number of result columns")
 TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INCORRECT_TIMESTAMP_VAL,   "Incorrect TIMESTAMP value")
 TAOS_DEFINE_ERROR(TSDB_CODE_PAR_OFFSET_LESS_ZERO,          "soffset/offset can not be less than 0")
-TAOS_DEFINE_ERROR(TSDB_CODE_PAR_SLIMIT_LEAK_PARTITION_BY,  "slimit/soffset only available for PARTITION BY query")
+TAOS_DEFINE_ERROR(TSDB_CODE_PAR_SLIMIT_LEAK_PARTITION_GROUP_BY,  "slimit/soffset only available for PARTITION/GROUP BY query")
 TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_TOPIC_QUERY,        "Invalid topic query")
 TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_DROP_STABLE,        "Cannot drop super table in batch")
 TAOS_DEFINE_ERROR(TSDB_CODE_PAR_INVALID_FILL_TIME_RANGE,    "Start(end) time of query range required or time range too large")

--- a/tests/parallel_test/cases.task
+++ b/tests/parallel_test/cases.task
@@ -168,6 +168,7 @@
 ,,y,script,./test.sh -f tsim/parser/union.sim
 ,,y,script,./test.sh -f tsim/parser/union_sysinfo.sim
 ,,y,script,./test.sh -f tsim/parser/where.sim
+,,y,script,./test.sh -f tsim/parser/slimit_limit.sim
 ,,y,script,./test.sh -f tsim/query/tagLikeFilter.sim
 ,,y,script,./test.sh -f tsim/query/charScalarFunction.sim
 ,,y,script,./test.sh -f tsim/query/explain.sim

--- a/tests/script/tsim/parser/groupby.sim
+++ b/tests/script/tsim/parser/groupby.sim
@@ -415,12 +415,12 @@ if $data03 != 0 then
  return -1
 endi

-sql select count(*),first(ts),last(ts),min(c3) from group_tb1 group by c4 limit 1;
+sql select count(*),first(ts),last(ts),min(c3) from group_tb1 group by c4 slimit 1;
 if $rows != 1 then
  return -1
 endi

-sql select count(*),first(ts),last(ts),min(c3) from group_tb1 group by c4 limit 20 offset 9990;
+sql select count(*),first(ts),last(ts),min(c3) from group_tb1 group by c4 slimit 20 soffset 9990;
 if $rows != 10 then
  return -1
 endi

--- a/tests/script/tsim/parser/slimit_limit.sim
+++ b/tests/script/tsim/parser/slimit_limit.sim
+system sh/stop_dnodes.sh
+system sh/deploy.sh -n dnode1 -i 1
+system sh/exec.sh -n dnode1 -s start
+sql connect
+
+sql drop database if exists db1;
+sql create database db1 vgroups 1;
+sql use db1;
+sql create stable sta (ts timestamp, f1 int, f2 binary(200)) tags(t1 int, t2 int, t3 int);
+sql create table tba1 using sta tags(1, 1, 1);
+sql create table tba2 using sta tags(2, 2, 2);
+sql create table tba3 using sta tags(3, 3, 3);
+sql create table tba4 using sta tags(4, 4, 4);
+sql create table tba5 using sta tags(5, 5, 5);
+sql create table tba6 using sta tags(6, 6, 6);
+sql create table tba7 using sta tags(7, 7, 7);
+sql create table tba8 using sta tags(8, 8, 8);
+sql create index index1 on sta (t2);
+sql insert into tba1 values ('2022-04-26 15:15:01', 1, "a");
+sql insert into tba1 values ('2022-04-26 15:15:02', 11, "a");
+sql insert into tba2 values ('2022-04-26 15:15:01', 2, "a");
+sql insert into tba2 values ('2022-04-26 15:15:02', 22, "a");
+sql insert into tba3 values ('2022-04-26 15:15:01', 3, "a");
+sql insert into tba4 values ('2022-04-26 15:15:01', 4, "a");
+sql insert into tba5 values ('2022-04-26 15:15:01', 5, "a");
+sql insert into tba6 values ('2022-04-26 15:15:01', 6, "a");
+sql insert into tba7 values ('2022-04-26 15:15:01', 7, "a");
+sql insert into tba8 values ('2022-04-26 15:15:01', 8, "a");
+
+sql select t1,count(*) from sta group by t1 limit 1;
+if $rows != 8 then
+  return -1
+endi
+sql select t1,count(*) from sta group by t1 slimit 1;
+if $rows != 1 then
+  return -1
+endi
+sql select f1,count(*) from sta group by f1 limit 1;
+if $rows != 10 then
+  return -1
+endi
+sql select f1,count(*) from sta group by f1 slimit 1;
+if $rows != 1 then
+  return -1
+endi
+sql select t1,f1,count(*) from sta group by t1, f1 limit 1;
+if $rows != 10 then
+  return -1
+endi
+sql select t1,f1,count(*) from sta group by t1, f1 slimit 1;
+if $rows != 1 then
+  return -1
+endi
+sql select t1,f1,count(*) from sta group by f1, t1 limit 1;
+if $rows != 10 then
+  return -1
+endi
+sql select t1,f1,count(*) from sta group by f1, t1 slimit 1;
+if $rows != 1 then
+  return -1
+endi
+
+sql select t1,count(*) from sta group by t1 order by t1 limit 1;
+if $rows != 1 then
+  return -1
+endi
+sql select t1,count(*) from sta group by t1 order by t1 slimit 1;
+if $rows != 8 then
+  return -1
+endi
+sql select f1,count(*) from sta group by f1 order by f1 limit 1;
+if $rows != 1 then
+  return -1
+endi
+sql select f1,count(*) from sta group by f1 order by f1 slimit 1;
+if $rows != 10 then
+  return -1
+endi
+sql select t1,f1,count(*) from sta group by t1, f1 order by t1,f1 limit 1;
+if $rows != 1 then
+  return -1
+endi
+sql select t1,f1,count(*) from sta group by t1, f1 order by t1,f1 slimit 1;
+if $rows != 10 then
+  return -1
+endi
+sql select t1,f1,count(*) from sta group by f1, t1 order by f1,t1 limit 1;
+if $rows != 1 then
+  return -1
+endi
+sql select t1,f1,count(*) from sta group by f1, t1 order by f1,t1 slimit 1;
+if $rows != 10 then
+  return -1
+endi
+
+sql select t1,count(*) from sta group by t1 slimit 1 limit 1;
+if $rows != 1 then
+  return -1
+endi
+sql select f1,count(*) from sta group by f1 slimit 1 limit 1;
+if $rows != 1 then
+  return -1
+endi
+sql select t1,f1,count(*) from sta group by t1, f1 slimit 1 limit 1;
+if $rows != 1 then
+  return -1
+endi
+sql select t1,f1,count(*) from sta group by f1, t1 slimit 1 limit 1;
+if $rows != 1 then
+  return -1
+endi
+
+system sh/exec.sh -n dnode1 -s stop -x SIGINT
--- a/tests/system-test/2-query/sample.py
+++ b/tests/system-test/2-query/sample.py
@@ -171,6 +171,7 @@ class TDTestCase:
            if any(parm in condition.lower().strip() for parm in condition_exception):

                print(f"case in {line}: ", end='')
+                print(f"condition : {condition}: ", end='')
                return tdSql.error(self.sample_query_form(
                    sel=sel, func=func, col=col, m_comm=m_comm, k=k, r_comm=r_comm, alias=alias, fr=fr,
                    table_expr=table_expr, condition=condition
@@ -391,16 +392,6 @@ class TDTestCase:
        self.checksample(**case25)
        case26 = {"k": 1000}
        self.checksample(**case26)
-        case27 = {
-            "table_expr": f"{DBNAME}.stb1",
-            "condition": "group by tbname slimit 1 "
-        }
-        self.checksample(**case27)         # with slimit
-        case28 = {
-            "table_expr": f"{DBNAME}.stb1",
-            "condition": "group by tbname slimit 1 soffset 1"
-        }
-        self.checksample(**case28)         # with soffset

        pass