From a2ebeda89c2ec4309baac4c9d04edd5c8a568b3b Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Sat, 4 Jun 2022 19:28:30 +0800 Subject: [PATCH] feat: add sort/group logic for json --- include/common/tdatablock.h | 2 ++ source/common/src/tdatablock.c | 32 ++++++++++++--------- source/libs/executor/src/groupoperator.c | 36 +++++++++++++++++++++--- source/libs/index/src/indexFilter.c | 16 ++++------- source/util/src/tcompare.c | 29 +++++++++++++++++++ tests/system-test/2-query/json_tag.py | 21 +++++++------- 6 files changed, 97 insertions(+), 39 deletions(-) diff --git a/include/common/tdatablock.h b/include/common/tdatablock.h index 66b81efc5b..cce763cafe 100644 --- a/include/common/tdatablock.h +++ b/include/common/tdatablock.h @@ -186,6 +186,8 @@ static FORCE_INLINE void colDataAppendDouble(SColumnInfoData* pColumnInfoData, u *(double*)p = *(double*)v; } +int32_t getJsonValueLen(const char *data); + int32_t colDataAppend(SColumnInfoData* pColumnInfoData, uint32_t currentRow, const char* pData, bool isNull); int32_t colDataMergeCol(SColumnInfoData* pColumnInfoData, uint32_t numOfRow1, int32_t* capacity, const SColumnInfoData* pSource, uint32_t numOfRow2); diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index f77b823f3c..055992db53 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -99,6 +99,24 @@ void colDataTrim(SColumnInfoData* pColumnInfoData) { // TODO } +int32_t getJsonValueLen(const char *data) { + int32_t dataLen = 0; + if (*data == TSDB_DATA_TYPE_NULL) { + dataLen = CHAR_BYTES; + } else if (*data == TSDB_DATA_TYPE_NCHAR) { + dataLen = varDataTLen(data + CHAR_BYTES) + CHAR_BYTES; + } else if (*data == TSDB_DATA_TYPE_DOUBLE) { + dataLen = DOUBLE_BYTES + CHAR_BYTES; + } else if (*data == TSDB_DATA_TYPE_BOOL) { + dataLen = CHAR_BYTES + CHAR_BYTES; + } else if (*data == TD_TAG_JSON) { // json string + dataLen = ((STag*)(data))->len; + } else { + ASSERT(0); + } + return dataLen; +} + int32_t colDataAppend(SColumnInfoData* pColumnInfoData, uint32_t currentRow, const char* pData, bool isNull) { ASSERT(pColumnInfoData != NULL); @@ -118,19 +136,7 @@ int32_t colDataAppend(SColumnInfoData* pColumnInfoData, uint32_t currentRow, con if (IS_VAR_DATA_TYPE(type)) { int32_t dataLen = 0; if (type == TSDB_DATA_TYPE_JSON) { - if (*pData == TSDB_DATA_TYPE_NULL) { - dataLen = CHAR_BYTES; - } else if (*pData == TSDB_DATA_TYPE_NCHAR) { - dataLen = varDataTLen(pData + CHAR_BYTES) + CHAR_BYTES; - } else if (*pData == TSDB_DATA_TYPE_DOUBLE) { - dataLen = DOUBLE_BYTES + CHAR_BYTES; - } else if (*pData == TSDB_DATA_TYPE_BOOL) { - dataLen = CHAR_BYTES + CHAR_BYTES; - } else if (*pData == TD_TAG_JSON) { // json string - dataLen = ((STag*)(pData))->len; - } else { - ASSERT(0); - } + dataLen = getJsonValueLen(pData); }else { dataLen = varDataTLen(pData); } diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index 3691024096..0a6b3149ef 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -93,7 +93,15 @@ static bool groupKeyCompare(SArray* pGroupCols, SArray* pGroupColVals, SSDataBlo char* val = colDataGetData(pColInfoData, rowIndex); - if (IS_VAR_DATA_TYPE(pkey->type)) { + if (pkey->type == TSDB_DATA_TYPE_JSON) { + int32_t dataLen = getJsonValueLen(val); + + if (memcmp(pkey->pData, val, dataLen) == 0){ + continue; + } else { + return false; + } + } else if (IS_VAR_DATA_TYPE(pkey->type)) { int32_t len = varDataLen(val); if (len == varDataLen(pkey->pData) && memcmp(varDataVal(pkey->pData), varDataVal(val), len) == 0) { continue; @@ -129,7 +137,10 @@ void recordNewGroupKeys(SArray* pGroupCols, SArray* pGroupColVals, SSDataBlock* } else { pkey->isNull = false; char* val = colDataGetData(pColInfoData, rowIndex); - if (IS_VAR_DATA_TYPE(pkey->type)) { + if (pkey->type == TSDB_DATA_TYPE_JSON) { + int32_t dataLen = getJsonValueLen(val); + memcpy(pkey->pData, val, dataLen); + } else if (IS_VAR_DATA_TYPE(pkey->type)) { memcpy(pkey->pData, val, varDataTLen(val)); ASSERT(varDataTLen(val) <= pkey->bytes); } else { @@ -153,7 +164,11 @@ int32_t buildGroupKeys(void* pKey, const SArray* pGroupColVals) { } isNull[i] = 0; - if (IS_VAR_DATA_TYPE(pkey->type)) { + if (pkey->type == TSDB_DATA_TYPE_JSON) { + int32_t dataLen = getJsonValueLen(pkey->pData); + memcpy(pStart, (pkey->pData), dataLen); + pStart += dataLen; + } else if (IS_VAR_DATA_TYPE(pkey->type)) { varDataCopy(pStart, pkey->pData); pStart += varDataTLen(pkey->pData); ASSERT(varDataTLen(pkey->pData) <= pkey->bytes); @@ -178,7 +193,10 @@ static void doAssignGroupKeys(SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t char* dest = GET_ROWCELL_INTERBUF(pEntryInfo); char* data = colDataGetData(pColInfoData, rowIndex); - if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) { + if (pColInfoData->info.type == TSDB_DATA_TYPE_JSON) { + int32_t dataLen = getJsonValueLen(data); + memcpy(dest, data, dataLen); + } else if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) { varDataCopy(dest, data); } else { memcpy(dest, data, pColInfoData->info.bytes); @@ -447,6 +465,16 @@ static void doHashPartition(SOperatorInfo* pOperator, SSDataBlock* pBlock) { if (colDataIsNull_s(pColInfoData, j)) { offset[(*rows)] = -1; contentLen = 0; + } else if(pColInfoData->info.type == TSDB_DATA_TYPE_JSON){ + offset[*rows] = (*columnLen); + char* src = colDataGetData(pColInfoData, j); + int32_t dataLen = getJsonValueLen(src); + + memcpy(data + (*columnLen), src, dataLen); + int32_t v = (data + (*columnLen) + dataLen - (char*)pPage); + ASSERT(v > 0); + + contentLen = dataLen; } else { offset[*rows] = (*columnLen); char* src = colDataGetData(pColInfoData, j); diff --git a/source/libs/index/src/indexFilter.c b/source/libs/index/src/indexFilter.c index b41006b6dd..9f5fa4cb27 100644 --- a/source/libs/index/src/indexFilter.c +++ b/source/libs/index/src/indexFilter.c @@ -140,7 +140,7 @@ static int32_t sifGetValueFromNode(SNode *node, char **value) { dataLen = 0; } else if (*pData == TSDB_DATA_TYPE_NCHAR) { dataLen = varDataTLen(pData + CHAR_BYTES); - } else if (*pData == TSDB_DATA_TYPE_BIGINT || *pData == TSDB_DATA_TYPE_DOUBLE) { + } else if (*pData == TSDB_DATA_TYPE_DOUBLE) { dataLen = LONG_BYTES; } else if (*pData == TSDB_DATA_TYPE_BOOL) { dataLen = CHAR_BYTES; @@ -457,10 +457,6 @@ static int32_t sifGetOperFn(int32_t funcId, sif_func_t *func, SIdxFltStatus *sta static int32_t sifExecOper(SOperatorNode *node, SIFCtx *ctx, SIFParam *output) { int32_t code = 0; int32_t nParam = sifGetOperParamNum(node->opType); - if (nParam <= 1) { - SIF_ERR_JRET(TSDB_CODE_QRY_INVALID_INPUT); - } - SIFParam *params = NULL; SIF_ERR_RET(sifInitOperParams(¶ms, node, ctx)); @@ -469,14 +465,12 @@ static int32_t sifExecOper(SOperatorNode *node, SIFCtx *ctx, SIFParam *output) { sif_func_t operFn = sifNullFunc; code = sifGetOperFn(node->opType, &operFn, &output->status); - if (ctx->noExec) { - SIF_RET(code); - } else { - return operFn(¶ms[0], nParam > 1 ? ¶ms[1] : NULL, output); + if (!ctx->noExec) { + code = operFn(¶ms[0], nParam > 1 ? ¶ms[1] : NULL, output); } -_return: + taosMemoryFree(params); - SIF_RET(code); + return code; } static int32_t sifExecLogic(SLogicConditionNode *node, SIFCtx *ctx, SIFParam *output) { diff --git a/source/util/src/tcompare.c b/source/util/src/tcompare.c index 7f1fdf3306..b660927899 100644 --- a/source/util/src/tcompare.c +++ b/source/util/src/tcompare.c @@ -227,6 +227,33 @@ int32_t compareJsonContainsKey(const void* pLeft, const void* pRight) { return 1; } +// string > number > bool > null +// ref: https://dev.mysql.com/doc/refman/8.0/en/json.html#json-comparison +int32_t compareJsonVal(const void *pLeft, const void *pRight) { + char leftType = *(char*)pLeft; + char rightType = *(char*)pRight; + if(leftType != rightType){ + return leftType > rightType ? 1 : -1; + } + + char* realDataLeft = POINTER_SHIFT(pLeft, CHAR_BYTES); + char* realDataRight = POINTER_SHIFT(pRight, CHAR_BYTES); + if(leftType == TSDB_DATA_TYPE_BOOL) { + DEFAULT_COMP(GET_INT8_VAL(realDataLeft), GET_INT8_VAL(realDataRight)); + }else if(leftType == TSDB_DATA_TYPE_DOUBLE){ + DEFAULT_DOUBLE_COMP(GET_DOUBLE_VAL(realDataLeft), GET_DOUBLE_VAL(realDataRight)); + }else if(leftType == TSDB_DATA_TYPE_NCHAR){ + return compareLenPrefixedWStr(realDataLeft, realDataRight); + }else if(leftType == TSDB_DATA_TYPE_NULL) { + return 0; + }else{ + assert(0); + } +} + +int32_t compareJsonValDesc(const void *pLeft, const void *pRight) { + return compareJsonVal(pRight, pLeft); +} /* * Compare two strings * TSDB_MATCH: Match @@ -601,6 +628,8 @@ __compar_fn_t getKeyComparFunc(int32_t keyType, int32_t order) { return (order == TSDB_ORDER_ASC) ? compareLenPrefixedStr : compareLenPrefixedStrDesc; case TSDB_DATA_TYPE_NCHAR: return (order == TSDB_ORDER_ASC) ? compareLenPrefixedWStr : compareLenPrefixedWStrDesc; + case TSDB_DATA_TYPE_JSON: + return (order == TSDB_ORDER_ASC) ? compareJsonVal : compareJsonValDesc; default: return (order == TSDB_ORDER_ASC) ? compareInt32Val : compareInt32ValDesc; } diff --git a/tests/system-test/2-query/json_tag.py b/tests/system-test/2-query/json_tag.py index 486abf9f40..f6a32cbe95 100644 --- a/tests/system-test/2-query/json_tag.py +++ b/tests/system-test/2-query/json_tag.py @@ -120,12 +120,12 @@ class TDTestCase: tdSql.error("select * from jsons1 where jtag contains 'location'='beijing'") # # # test function error - # tdSql.error("select avg(jtag->'tag1') from jsons1") - # tdSql.error("select avg(jtag) from jsons1") - # tdSql.error("select min(jtag->'tag1') from jsons1") - # tdSql.error("select min(jtag) from jsons1") - # tdSql.error("select ceil(jtag->'tag1') from jsons1") - # tdSql.error("select ceil(jtag) from jsons1") + tdSql.error("select avg(jtag->'tag1') from jsons1") + tdSql.error("select avg(jtag) from jsons1") + tdSql.error("select min(jtag->'tag1') from jsons1") + tdSql.error("select min(jtag) from jsons1") + tdSql.error("select ceil(jtag->'tag1') from jsons1") + tdSql.error("select ceil(jtag) from jsons1") # # # test select normal column tdSql.query("select dataint from jsons1") @@ -176,7 +176,6 @@ class TDTestCase: tdSql.checkColNameList(res, cname_list) - # # test where with json tag # tdSql.error("select * from jsons1_1 where jtag is not null") # tdSql.error("select * from jsons1 where jtag='{\"tag1\":11,\"tag2\":\"\"}'") @@ -313,8 +312,8 @@ class TDTestCase: # tdSql.checkRows(2) # # # test with tbname/normal column - # tdSql.query("select * from jsons1 where tbname = 'jsons1_1'") - # tdSql.checkRows(2) + tdSql.query("select * from jsons1 where tbname = 'jsons1_1'") + tdSql.checkRows(2) # tdSql.query("select * from jsons1 where tbname = 'jsons1_1' and jtag contains 'tag3'") # tdSql.checkRows(2) # tdSql.query("select * from jsons1 where tbname = 'jsons1_1' and jtag contains 'tag3' and dataint=3") @@ -345,14 +344,14 @@ class TDTestCase: # tdSql.checkRows(1) # # # test distinct - # tdSql.execute("insert into jsons1_14 using jsons1 tags('{\"tag1\":\"收到货\",\"tag2\":\"\",\"tag3\":null}') values(1591062628000, 2, NULL, '你就会', 'dws')") + tdSql.execute("insert into jsons1_14 using jsons1 tags('{\"tag1\":\"收到货\",\"tag2\":\"\",\"tag3\":null}') values(1591062628000, 2, NULL, '你就会', 'dws')") # tdSql.query("select distinct jtag->'tag1' from jsons1") # tdSql.checkRows(8) # tdSql.query("select distinct jtag from jsons1") # tdSql.checkRows(9) # # #test dumplicate key with normal colomn - # tdSql.execute("INSERT INTO jsons1_15 using jsons1 tags('{\"tbname\":\"tt\",\"databool\":true,\"datastr\":\"是是是\"}') values(1591060828000, 4, false, 'jjsf', \"你就会\")") + tdSql.execute("INSERT INTO jsons1_15 using jsons1 tags('{\"tbname\":\"tt\",\"databool\":true,\"datastr\":\"是是是\"}') values(1591060828000, 4, false, 'jjsf', \"你就会\")") # tdSql.query("select *,tbname,jtag from jsons1 where jtag->'datastr' match '是' and datastr match 'js'") # tdSql.checkRows(1) # tdSql.query("select tbname,jtag->'tbname' from jsons1 where jtag->'tbname'='tt' and tbname='jsons1_14'") -- GitLab