Merge pull request #10202 from taosdata/szhou/feature/td-11218

TD-11218: string functions

Merge pull request #10202 from taosdata/szhou/feature/td-11218
TD-11218: string functions
105f17e2 · shenglian-zhou · GitHub · ac6c719a · b76db21a · 105f17e2
隐藏空白更改
内联并排

Showing with 260 addition and 4 deletion

src/common/inc/texpr.h src/common/inc/texpr.h +7 -2

src/common/src/texpr.c src/common/src/texpr.c +252 -1

src/query/src/qAggMain.c src/query/src/qAggMain.c +1 -1

未找到文件。
--- a/src/common/inc/texpr.h
+++ b/src/common/inc/texpr.h
@@ -65,7 +65,12 @@ struct SSchema;
 #define TSDB_FUNC_SCALAR_CONCAT_WS    (TSDB_FUNC_FLAG_SCALAR | 0x000F)
 #define TSDB_FUNC_SCALAR_CHAR_LENGTH  (TSDB_FUNC_FLAG_SCALAR | 0x0010)
 #define TSDB_FUNC_SCALAR_CAST         (TSDB_FUNC_FLAG_SCALAR | 0x0011)
-#define TSDB_FUNC_SCALAR_MAX_NUM      18
+#define TSDB_FUNC_SCALAR_LOWER        (TSDB_FUNC_FLAG_SCALAR | 0x0012)
+#define TSDB_FUNC_SCALAR_UPPER        (TSDB_FUNC_FLAG_SCALAR | 0x0013)
+#define TSDB_FUNC_SCALAR_LTRIM        (TSDB_FUNC_FLAG_SCALAR | 0x0014)
+#define TSDB_FUNC_SCALAR_RTRIM        (TSDB_FUNC_FLAG_SCALAR | 0x0015)
+#define TSDB_FUNC_SCALAR_SUBSTR       (TSDB_FUNC_FLAG_SCALAR | 0x0016)
+#define TSDB_FUNC_SCALAR_NUM_FUNCTIONS 23

 #define TSDB_FUNC_SCALAR_NAME_MAX_LEN 16

@@ -87,7 +92,7 @@ typedef struct tScalarFunctionInfo{
 } tScalarFunctionInfo;

 /* global scalar sql functions array */
-extern struct tScalarFunctionInfo aScalarFunctions[TSDB_FUNC_SCALAR_MAX_NUM];
+extern struct tScalarFunctionInfo aScalarFunctions[TSDB_FUNC_SCALAR_NUM_FUNCTIONS];


 typedef bool (*__result_filter_fn_t)(const void *, void *);

--- a/src/common/src/texpr.c
+++ b/src/common/src/texpr.c
@@ -33,6 +33,8 @@ static int32_t exprValidateMathNode(tExprNode *pExpr);
 static int32_t exprValidateStringConcatNode(tExprNode *pExpr);
 static int32_t exprValidateStringConcatWsNode(tExprNode *pExpr);
 static int32_t exprValidateStringLengthNode(tExprNode *pExpr);
+static int32_t exprValidateStringLowerUpperTrimNode(char* msgBuf, tExprNode *pExpr);
+static int32_t exprValidateStringSubstrNode(char* msgBuf, tExprNode *pExpr);
 static int32_t exprValidateCastNode(char* msgbuf, tExprNode *pExpr);

 static int32_t exprInvalidOperationMsg(char *msgbuf, const char *msg) {
@@ -77,6 +79,15 @@ int32_t exprTreeValidateFunctionNode(char* msgbuf, tExprNode *pExpr) {
    case TSDB_FUNC_SCALAR_CONCAT_WS: {
      return exprValidateStringConcatWsNode(pExpr);
    }
+    case TSDB_FUNC_SCALAR_LOWER:
+    case TSDB_FUNC_SCALAR_UPPER:
+    case TSDB_FUNC_SCALAR_LTRIM:
+    case TSDB_FUNC_SCALAR_RTRIM: {
+      return exprValidateStringLowerUpperTrimNode(msgbuf, pExpr);
+    }
+    case TSDB_FUNC_SCALAR_SUBSTR: {
+      return exprValidateStringSubstrNode(msgbuf, pExpr);
+    }

    default:
      break;
@@ -1042,6 +1053,58 @@ int32_t exprValidateStringLengthNode(tExprNode *pExpr) {
  return TSDB_CODE_SUCCESS;
 }

+int32_t exprValidateStringLowerUpperTrimNode(char* msgBuf, tExprNode *pExpr) {
+  if (pExpr->_func.numChildren != 1) {
+    return TSDB_CODE_TSC_INVALID_OPERATION;
+  }
+
+  tExprNode* child1 = pExpr->_func.pChildren[0];
+
+  if (child1->nodeType == TSQL_NODE_VALUE) {
+    child1->resultType = (int16_t)child1->pVal->nType;
+    child1->resultBytes = (int16_t)(child1->pVal->nLen + VARSTR_HEADER_SIZE);
+  }
+
+  if (!IS_VAR_DATA_TYPE(child1->resultType)) {
+    return TSDB_CODE_TSC_INVALID_OPERATION;
+  }
+
+  pExpr->resultType = child1->resultType;
+  pExpr->resultBytes = child1->resultBytes;
+
+  return TSDB_CODE_SUCCESS;
+}
+
+int32_t exprValidateStringSubstrNode(char* msgBuf, tExprNode *pExpr) {
+  if ((pExpr->_func.numChildren != 2) && (pExpr->_func.numChildren != 3)) {
+    return TSDB_CODE_TSC_INVALID_OPERATION;
+  }
+
+  tExprNode* child1 = pExpr->_func.pChildren[0];
+
+  if (child1->nodeType == TSQL_NODE_VALUE) {
+    child1->resultType = (int16_t)child1->pVal->nType;
+    child1->resultBytes = (int16_t)(child1->pVal->nLen + VARSTR_HEADER_SIZE);
+  }
+
+  if (!IS_VAR_DATA_TYPE(child1->resultType)) {
+    return TSDB_CODE_TSC_INVALID_OPERATION;
+  }
+
+  tExprNode* pos = pExpr->_func.pChildren[1];
+  tExprNode* length = (pExpr->_func.numChildren == 3) ? pExpr->_func.pChildren[2] : NULL;
+
+  if (!IS_NUMERIC_TYPE(pos->resultType) ||
+      (length != NULL && !IS_NUMERIC_TYPE(length->resultType))) {
+    return TSDB_CODE_TSC_INVALID_OPERATION;
+  }
+
+  pExpr->resultType = child1->resultType;
+  pExpr->resultBytes = child1->resultBytes;
+
+  return TSDB_CODE_SUCCESS;
+}
+
 int32_t exprValidateCastNode(char* msgbuf, tExprNode *pExpr) {
  const char* msg1 = "invalid param num for cast function";
  const char* msg2 = "the second param should be a valid type name for cast function";
@@ -1409,6 +1472,168 @@ void vectorCharLength(int16_t functionId, tExprOperandInfo *pInputs, int32_t num
  }
 }

+void vectorLowerUpperTrimFunc(int16_t functionId, tExprOperandInfo *pInputs, int32_t numInputs, tExprOperandInfo* pOutput, int32_t order) {
+  assert(numInputs == 1);
+  assert(pInputs[0].numOfRows == 1 || pInputs[0].numOfRows == pOutput->numOfRows);
+
+  char* outputData = NULL;
+  char** inputData = calloc(numInputs, sizeof(char*));
+
+  int16_t inputType = pInputs[0].type;
+  for (int i = 0; i < pOutput->numOfRows; ++i) {
+    for (int j = 0; j < numInputs; ++j) {
+      if (pInputs[j].numOfRows == 1) {
+        inputData[j] = pInputs[j].data;
+      } else {
+        inputData[j] = pInputs[j].data + i * pInputs[j].bytes;
+      }
+    }
+
+    outputData = pOutput->data + i * pOutput->bytes;
+    bool hasNullInputs = false;
+    for (int j = 0; j < numInputs; ++j) {
+      if (isNull(inputData[j], pInputs[j].type)) {
+        hasNullInputs = true;
+        setNull(outputData, pOutput->type, pOutput->bytes);
+      }
+    }
+    if (!hasNullInputs) {
+      switch (functionId) {
+        case TSDB_FUNC_SCALAR_LOWER:
+        case TSDB_FUNC_SCALAR_UPPER: {
+          assert(numInputs == 1);
+
+          int32_t len = varDataLen(inputData[0]);
+          varDataSetLen(outputData, len);
+
+          if (inputType == TSDB_DATA_TYPE_BINARY) {
+            char* pInputChar = varDataVal(inputData[0]);
+            char* pOutputChar = varDataVal(outputData);
+            for (int32_t k = 0; k < len; ++k) {
+              if (functionId == TSDB_FUNC_SCALAR_LOWER)
+                *(pOutputChar + k) = tolower(*(pInputChar + k));
+              else
+                *(pOutputChar + k) = toupper(*(pInputChar + k));
+            }
+          } else if (inputType == TSDB_DATA_TYPE_NCHAR) {
+            uint32_t* pInputChar = (uint32_t*)varDataVal(inputData[0]);
+            uint32_t* pOutputChar = (uint32_t*)varDataVal(outputData);
+            for (int32_t k = 0; k < len / TSDB_NCHAR_SIZE; ++k) {
+              if (functionId == TSDB_FUNC_SCALAR_LOWER)
+                *(pOutputChar + k) = towlower(*(pInputChar + k));
+              else
+                *(pOutputChar + k) = towupper((*(pInputChar + k)));
+            }
+          }
+          break;
+        }
+        case TSDB_FUNC_SCALAR_LTRIM: {
+          int32_t len = varDataLen(inputData[0]);
+          int32_t charLen = (inputType == TSDB_DATA_TYPE_BINARY) ? len : len / TSDB_NCHAR_SIZE;
+
+          int32_t k = 0;
+          for (; k < charLen; ++k) {
+            if (inputType == TSDB_DATA_TYPE_BINARY) {
+              char* pInputChar = (char*) varDataVal(inputData[0]);
+              if (!isspace(*(pInputChar + k))) {
+                break;
+              }
+            } else {
+              uint32_t* pInputChar = (uint32_t*)varDataVal(inputData[0]);
+              if (!iswspace(*(pInputChar + k))) {
+                break;
+              }
+            }
+          }
+
+          int32_t resultCharLen = charLen - k;
+          int32_t resultByteLen = (inputType == TSDB_DATA_TYPE_BINARY) ? resultCharLen : resultCharLen * TSDB_NCHAR_SIZE;
+          int32_t beginByteLen = (inputType == TSDB_DATA_TYPE_BINARY) ? k : k * TSDB_NCHAR_SIZE;
+          varDataSetLen(outputData, resultByteLen);
+          memcpy((char*)varDataVal(outputData),(char*)varDataVal(inputData[0])+beginByteLen, resultByteLen);
+          break;
+        }
+
+        case TSDB_FUNC_SCALAR_RTRIM: {
+          int32_t len = varDataLen(inputData[0]);
+          int32_t charLen = (inputType == TSDB_DATA_TYPE_BINARY) ? len : len / TSDB_NCHAR_SIZE;
+
+          int32_t k = charLen-1;
+          for (; k >=0; --k) {
+            if (inputType == TSDB_DATA_TYPE_BINARY) {
+              char* pInputChar = (char*) varDataVal(inputData[0]);
+              if (!isspace(*(pInputChar + k))) {
+                break;
+              }
+            } else {
+              uint32_t* pInputChar = (uint32_t*)varDataVal(inputData[0]);
+              if (!iswspace(*(pInputChar + k))) {
+                break;
+              }
+            }
+          }
+
+          int32_t resultCharLen = k + 1;
+          int32_t resultByteLen = (inputType == TSDB_DATA_TYPE_BINARY) ? resultCharLen : resultCharLen * TSDB_NCHAR_SIZE;
+          varDataSetLen(outputData, resultByteLen);
+          memcpy(varDataVal(outputData),varDataVal(inputData[0]), resultByteLen);
+          break;
+        }
+        default: {
+          break;
+        }
+      }
+    }
+  }
+  free(inputData);
+}
+
+void vectorSubstrFunc(int16_t functionId, tExprOperandInfo *pInputs, int32_t numInputs, tExprOperandInfo* pOutput, int32_t order) {
+  int32_t subPosChar = 0;
+  GET_TYPED_DATA(subPosChar, int32_t, pInputs[1].type, pInputs[1].data);
+
+  int32_t subLenChar = INT16_MAX;
+  if (numInputs == 3) {
+    GET_TYPED_DATA(subLenChar, int32_t, pInputs[2].type, pInputs[2].data);
+  }
+
+  for (int32_t i = 0; i < pOutput->numOfRows; ++i) {
+    char* inputData = NULL;
+    if (pInputs[0].numOfRows == 1) {
+      inputData = pInputs[0].data;
+    } else {
+      inputData = pInputs[0].data + i * pInputs[0].bytes;
+    }
+    char* outputData = pOutput->data + i * pOutput->bytes;
+    if (isNull(inputData, pInputs[0].type)) {
+      setNull(outputData, pOutput->type, pOutput->bytes);
+      continue;
+    }
+
+    int16_t strBytes = varDataLen(inputData);
+    int32_t resultStartBytes = 0;
+    if (subPosChar > 0) {
+      int32_t subPosBytes = (pInputs[0].type == TSDB_DATA_TYPE_BINARY) ? subPosChar-1 : (subPosChar-1) * TSDB_NCHAR_SIZE;
+      resultStartBytes = MIN(subPosBytes, strBytes);
+    } else {
+      int32_t subPosBytes = (pInputs[0].type == TSDB_DATA_TYPE_BINARY) ? strBytes + subPosChar : (strBytes) + subPosChar * TSDB_NCHAR_SIZE;
+      resultStartBytes = MAX(subPosBytes, 0);
+    }
+    int32_t subLenBytes = 0;
+    if (subLenChar > 0) {
+      subLenBytes = (pInputs[0].type == TSDB_DATA_TYPE_BINARY) ? subLenChar : subLenChar * TSDB_NCHAR_SIZE;
+    } else {
+      subLenBytes = 0;
+    }
+    int32_t resultLenBytes = MIN(subLenBytes, strBytes - resultStartBytes);
+
+    varDataSetLen(outputData, resultLenBytes);
+    if (resultLenBytes > 0) {
+      memcpy((char*)varDataVal(outputData), (char*)varDataVal(inputData) + resultStartBytes, resultLenBytes);
+    }
+  }
+}
+
 void vectorMathFunc(int16_t functionId, tExprOperandInfo *pInputs, int32_t numInputs, tExprOperandInfo* pOutput, int32_t order)  {
  for (int i = 0; i < numInputs; ++i) {
    assert(pInputs[i].numOfRows == 1 || pInputs[i].numOfRows == pOutput->numOfRows);
@@ -1629,7 +1854,7 @@ void vectorMathFunc(int16_t functionId, tExprOperandInfo *pInputs, int32_t numIn
 _expr_scalar_function_t getExprScalarFunction(uint16_t funcId) {
  assert(TSDB_FUNC_IS_SCALAR(funcId));
  int16_t scalaIdx = TSDB_FUNC_SCALAR_INDEX(funcId);
-  assert(scalaIdx>=0 && scalaIdx <= TSDB_FUNC_SCALAR_MAX_NUM);
+  assert(scalaIdx>=0 && scalaIdx <= TSDB_FUNC_SCALAR_NUM_FUNCTIONS);
  return aScalarFunctions[scalaIdx].scalarFunc;
 }

@@ -1724,4 +1949,30 @@ tScalarFunctionInfo aScalarFunctions[] = {
        "cast",
        vectorMathFunc
    },
+    {
+        TSDB_FUNC_SCALAR_LOWER,
+        "lower",
+        vectorLowerUpperTrimFunc
+    },
+    {
+        TSDB_FUNC_SCALAR_UPPER,
+        "upper",
+        vectorLowerUpperTrimFunc
+    },
+    {
+        TSDB_FUNC_SCALAR_LTRIM,
+        "ltrim",
+        vectorLowerUpperTrimFunc
+    },
+    {
+        TSDB_FUNC_SCALAR_RTRIM,
+        "rtrim",
+        vectorLowerUpperTrimFunc
+    },
+    {
+        TSDB_FUNC_SCALAR_SUBSTR,
+        "substr",
+        vectorSubstrFunc
+    },
+
 };
--- a/src/query/src/qAggMain.c
+++ b/src/query/src/qAggMain.c
@@ -513,7 +513,7 @@ int32_t getResultDataInfo(int32_t dataType, int32_t dataBytes, int32_t functionI
 // TODO use hash table
 int32_t isValidFunction(const char* name, int32_t len) {

-  for (int32_t i = 0; i < TSDB_FUNC_SCALAR_MAX_NUM; ++i) {
+  for (int32_t i = 0; i < TSDB_FUNC_SCALAR_NUM_FUNCTIONS; ++i) {
    int32_t nameLen = (int32_t) strlen(aScalarFunctions[i].name);
    if (len != nameLen) {
      continue;