diff --git a/src/client/inc/tscSecondaryMerge.h b/src/client/inc/tscSecondaryMerge.h index 2f47fe36d582babbe2b5ff01d3aaa4cb4c411aaf..ce3f4091d566f0d339dc940378ac6dcf7f1991fd 100644 --- a/src/client/inc/tscSecondaryMerge.h +++ b/src/client/inc/tscSecondaryMerge.h @@ -37,13 +37,13 @@ extern "C" { struct SQLFunctionCtx; -typedef struct SLocalDataSrc { +typedef struct SLocalDataSource { tExtMemBuffer *pMemBuffer; int32_t flushoutIdx; int32_t pageId; int32_t rowIdx; tFilePage filePage; -} SLocalDataSrc; +} SLocalDataSource; enum { TSC_LOCALREDUCE_READY = 0x0, @@ -52,7 +52,7 @@ enum { }; typedef struct SLocalReducer { - SLocalDataSrc **pLocalDataSrc; + SLocalDataSource **pLocalDataSrc; int32_t numOfBuffer; int32_t numOfCompleted; diff --git a/src/client/inc/tscUtil.h b/src/client/inc/tscUtil.h index 7d470c35a4d98c3b8b94e14544af6225ac00f1e3..48afd1802cdf20b9bb4e59317787f045644d44d4 100644 --- a/src/client/inc/tscUtil.h +++ b/src/client/inc/tscUtil.h @@ -41,21 +41,24 @@ typedef struct SParsedColElem { } SParsedColElem; typedef struct SParsedDataColInfo { - bool ordered; // denote if the timestamp in one data block ordered or not int16_t numOfCols; int16_t numOfAssignedCols; SParsedColElem elems[TSDB_MAX_COLUMNS]; bool hasVal[TSDB_MAX_COLUMNS]; - int64_t prevTimestamp; } SParsedDataColInfo; -SInsertedDataBlocks* tscCreateDataBlock(int32_t size); -void tscDestroyDataBlock(SInsertedDataBlocks** pDataBlock); +STableDataBlocks* tscCreateDataBlock(int32_t size); +void tscDestroyDataBlock(STableDataBlocks* pDataBlock); +void tscAppendDataBlock(SDataBlockList* pList, STableDataBlocks* pBlocks); SDataBlockList* tscCreateBlockArrayList(); -void tscDestroyBlockArrayList(SDataBlockList** pList); -int32_t tscCopyDataBlockToPayload(SSqlObj* pSql, SInsertedDataBlocks* pDataBlock); +void* tscDestroyBlockArrayList(SDataBlockList* pList); +int32_t tscCopyDataBlockToPayload(SSqlObj* pSql, STableDataBlocks* pDataBlock); void tscFreeUnusedDataBlocks(SDataBlockList* pList); +void tscMergeTableDataBlocks(SSqlCmd* pCmd, SDataBlockList* pDataList); +STableDataBlocks* tscGetDataBlockFromList(void* pHashList, SDataBlockList* pDataBlockList, int64_t id, int32_t size, + int32_t startOffset, int32_t rowSize, char* tableId); +STableDataBlocks* tscCreateDataBlockEx(size_t size, int32_t rowSize, int32_t startOffset, char* name); SVnodeSidList* tscGetVnodeSidList(SMetricMeta* pMetricmeta, int32_t vnodeIdx); SMeterSidExtInfo* tscGetMeterSidInfo(SVnodeSidList* pSidList, int32_t idx); @@ -66,8 +69,7 @@ bool tscIsTwoStageMergeMetricQuery(SSqlObj* pSql); /** * * for the projection query on metric or point interpolation query on metric, - * we iterate all the meters, instead of invoke query on all qualified meters - * simultaneously. + * we iterate all the meters, instead of invoke query on all qualified meters simultaneously. * * @param pSql sql object * @return @@ -124,8 +126,7 @@ void tscIncStreamExecutionCount(void* pStream); bool tscValidateColumnId(SSqlCmd* pCmd, int32_t colId); -// get starter position of metric query condition (query on tags) in -// SSqlCmd.payload +// get starter position of metric query condition (query on tags) in SSqlCmd.payload char* tsGetMetricQueryCondPos(STagCond* pCond); void tscTagCondAssign(STagCond* pDst, STagCond* pSrc); void tscTagCondRelease(STagCond* pCond); @@ -139,6 +140,7 @@ void tscCleanSqlCmd(SSqlCmd* pCmd); bool tscShouldFreeAsyncSqlObj(SSqlObj* pSql); void tscDoQuery(SSqlObj* pSql); +int32_t sortRemoveDuplicates(STableDataBlocks* dataBuf, int32_t numOfRows); #ifdef __cplusplus } #endif diff --git a/src/client/inc/tsclient.h b/src/client/inc/tsclient.h index 0c244e0ce9514e3a5bac70bf709c892001b615f2..5ed0550cd8264eae79eed93cbd69d7ee6d56a6eb 100644 --- a/src/client/inc/tsclient.h +++ b/src/client/inc/tsclient.h @@ -169,16 +169,22 @@ typedef struct STagCond { char * pData; } STagCond; -typedef struct SInsertedDataBlocks { - char meterId[TSDB_METER_ID_LEN]; - int64_t size; - uint32_t nAllocSize; - uint32_t numOfMeters; +typedef struct STableDataBlocks { + char meterId[TSDB_METER_ID_LEN]; + int64_t vgid; + int64_t size; + + int64_t prevTS; + bool ordered; + + int32_t numOfMeters; + int32_t rowSize; + uint32_t nAllocSize; union { char *filename; char *pData; }; -} SInsertedDataBlocks; +} STableDataBlocks; typedef struct SDataBlockList { int32_t idx; @@ -186,7 +192,7 @@ typedef struct SDataBlockList { int32_t nAlloc; char * userParam; /* user assigned parameters for async query */ void * udfp; /* user defined function pointer, used in async model */ - SInsertedDataBlocks **pData; + STableDataBlocks **pData; } SDataBlockList; typedef struct { diff --git a/src/client/src/tscAsync.c b/src/client/src/tscAsync.c index 660db08030879339a276a71019ca56f68ad34236..925a3f3c84609429ba986ac2aa91085a78d12c4f 100644 --- a/src/client/src/tscAsync.c +++ b/src/client/src/tscAsync.c @@ -410,7 +410,7 @@ void tscAsyncInsertMultiVnodesProxy(void *param, TAOS_RES *tres, int numOfRows) tscTrace("%p Async insertion completed, destroy data block list", pSql); // release data block data - tscDestroyBlockArrayList(&pCmd->pDataBlocks); + pCmd->pDataBlocks = tscDestroyBlockArrayList(pCmd->pDataBlocks); // all data has been sent to vnode, call user function (*pSql->fp)(pSql->param, tres, numOfRows); diff --git a/src/client/src/tscParseInsert.c b/src/client/src/tscParseInsert.c index a8cd14cd67bbc0d59596e87bc4b31482575a06e2..44a254f1bd018415e158912950b17388af5d9633 100644 --- a/src/client/src/tscParseInsert.c +++ b/src/client/src/tscParseInsert.c @@ -53,11 +53,11 @@ return TSDB_CODE_INVALID_SQL; \ } while (0) -static void setErrMsg(char* msg, char* sql); -static int32_t tscAllocateMemIfNeed(SInsertedDataBlocks* pDataBlock, int32_t rowSize); +static void setErrMsg(char *msg, char *sql); +static int32_t tscAllocateMemIfNeed(STableDataBlocks *pDataBlock, int32_t rowSize); // get formation -static int32_t getNumericType(const char* data) { +static int32_t getNumericType(const char *data) { if (*data == '-' || *data == '+') { data += 1; } @@ -73,7 +73,7 @@ static int32_t getNumericType(const char* data) { } } -static int64_t tscToInteger(char* data, char** endPtr) { +static int64_t tscToInteger(char *data, char **endPtr) { int32_t numType = getNumericType(data); int32_t radix = 10; @@ -86,14 +86,14 @@ static int64_t tscToInteger(char* data, char** endPtr) { return strtoll(data, endPtr, radix); } -int tsParseTime(char* value, int32_t valuelen, int64_t* time, char** next, char* error, int16_t timePrec) { - char* token; +int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char *error, int16_t timePrec) { + char * token; int tokenlen; int64_t interval; int64_t useconds = 0; - char* pTokenEnd = *next; + char *pTokenEnd = *next; tscGetToken(pTokenEnd, &token, &tokenlen); if (tokenlen == 0 && strlen(value) == 0) { INVALID_SQL_RET_MSG(error, "missing time stamp"); @@ -135,9 +135,9 @@ int tsParseTime(char* value, int32_t valuelen, int64_t* time, char** next, char* } /* - * time expression: - * e.g., now+12a, now-5h - */ + * time expression: + * e.g., now+12a, now-5h + */ pTokenEnd = tscGetToken(pTokenEnd, &token, &tokenlen); if (tokenlen && (*token == '+' || *token == '-')) { pTokenEnd = tscGetToken(pTokenEnd, &value, &valuelen); @@ -166,32 +166,32 @@ int tsParseTime(char* value, int32_t valuelen, int64_t* time, char** next, char* return TSDB_CODE_SUCCESS; } -int32_t tsParseOneColumnData(SSchema* pSchema, char* value, int valuelen, char* payload, char* msg, char** str, +int32_t tsParseOneColumnData(SSchema *pSchema, char *value, int valuelen, char *payload, char *msg, char **str, bool primaryKey, int16_t timePrec) { int64_t temp; - int32_t nullInt = *(int32_t*)TSDB_DATA_NULL_STR_L; - char* endptr = NULL; + int32_t nullInt = *(int32_t *)TSDB_DATA_NULL_STR_L; + char * endptr = NULL; errno = 0; // reset global error code switch (pSchema->type) { case TSDB_DATA_TYPE_BOOL: { // bool - if (valuelen == 4 && nullInt == *(int32_t*)value) { - *(uint8_t*)payload = TSDB_DATA_BOOL_NULL; + if (valuelen == 4 && nullInt == *(int32_t *)value) { + *(uint8_t *)payload = TSDB_DATA_BOOL_NULL; } else { if (strncmp(value, "true", valuelen) == 0) { - *(uint8_t*)payload = TSDB_TRUE; + *(uint8_t *)payload = TSDB_TRUE; } else if (strncmp(value, "false", valuelen) == 0) { - *(uint8_t*)payload = TSDB_FALSE; + *(uint8_t *)payload = TSDB_FALSE; } else { int64_t v = strtoll(value, NULL, 10); - *(uint8_t*)payload = (int8_t)((v == 0) ? TSDB_FALSE : TSDB_TRUE); + *(uint8_t *)payload = (int8_t)((v == 0) ? TSDB_FALSE : TSDB_TRUE); } } break; } case TSDB_DATA_TYPE_TINYINT: - if (valuelen == 4 && nullInt == *(int32_t*)value) { - *((int32_t*)payload) = TSDB_DATA_TINYINT_NULL; + if (valuelen == 4 && nullInt == *(int32_t *)value) { + *((int32_t *)payload) = TSDB_DATA_TINYINT_NULL; } else { int64_t v = tscToInteger(value, &endptr); if (errno == ERANGE || v > INT8_MAX || v < INT8_MIN) { @@ -199,18 +199,18 @@ int32_t tsParseOneColumnData(SSchema* pSchema, char* value, int valuelen, char* } int8_t v8 = (int8_t)v; - if (isNull((char*)&v8, pSchema->type)) { + if (isNull((char *)&v8, pSchema->type)) { INVALID_SQL_RET_MSG(msg, "data is overflow"); } - *((int8_t*)payload) = v8; + *((int8_t *)payload) = v8; } break; case TSDB_DATA_TYPE_SMALLINT: - if (valuelen == 4 && nullInt == *(int32_t*)value) { - *((int32_t*)payload) = TSDB_DATA_SMALLINT_NULL; + if (valuelen == 4 && nullInt == *(int32_t *)value) { + *((int32_t *)payload) = TSDB_DATA_SMALLINT_NULL; } else { int64_t v = tscToInteger(value, &endptr); @@ -219,17 +219,17 @@ int32_t tsParseOneColumnData(SSchema* pSchema, char* value, int valuelen, char* } int16_t v16 = (int16_t)v; - if (isNull((char*)&v16, pSchema->type)) { + if (isNull((char *)&v16, pSchema->type)) { INVALID_SQL_RET_MSG(msg, "data is overflow"); } - *((int16_t*)payload) = v16; + *((int16_t *)payload) = v16; } break; case TSDB_DATA_TYPE_INT: - if (valuelen == 4 && nullInt == *(int32_t*)value) { - *((int32_t*)payload) = TSDB_DATA_INT_NULL; + if (valuelen == 4 && nullInt == *(int32_t *)value) { + *((int32_t *)payload) = TSDB_DATA_INT_NULL; } else { int64_t v = tscToInteger(value, &endptr); @@ -238,36 +238,36 @@ int32_t tsParseOneColumnData(SSchema* pSchema, char* value, int valuelen, char* } int32_t v32 = (int32_t)v; - if (isNull((char*)&v32, pSchema->type)) { + if (isNull((char *)&v32, pSchema->type)) { INVALID_SQL_RET_MSG(msg, "data is overflow"); } - *((int32_t*)payload) = v32; + *((int32_t *)payload) = v32; } break; case TSDB_DATA_TYPE_BIGINT: - if (valuelen == 4 && nullInt == *(int32_t*)value) { - *((int64_t*)payload) = TSDB_DATA_BIGINT_NULL; + if (valuelen == 4 && nullInt == *(int32_t *)value) { + *((int64_t *)payload) = TSDB_DATA_BIGINT_NULL; } else { int64_t v = tscToInteger(value, &endptr); - if (isNull((char*)&v, pSchema->type) || errno == ERANGE) { + if (isNull((char *)&v, pSchema->type) || errno == ERANGE) { INVALID_SQL_RET_MSG(msg, "data is overflow"); } - *((int64_t*)payload) = v; + *((int64_t *)payload) = v; } break; case TSDB_DATA_TYPE_FLOAT: - if (valuelen == 4 && nullInt == *(int32_t*)value) { - *((int32_t*)payload) = TSDB_DATA_FLOAT_NULL; + if (valuelen == 4 && nullInt == *(int32_t *)value) { + *((int32_t *)payload) = TSDB_DATA_FLOAT_NULL; } else { float v = (float)strtod(value, &endptr); - if (isNull((char*)&v, pSchema->type) || isinf(v) || isnan(v)) { - *((int32_t*)payload) = TSDB_DATA_FLOAT_NULL; + if (isNull((char *)&v, pSchema->type) || isinf(v) || isnan(v)) { + *((int32_t *)payload) = TSDB_DATA_FLOAT_NULL; } else { - *((float*)payload) = v; + *((float *)payload) = v; } if (str != NULL) { @@ -280,14 +280,14 @@ int32_t tsParseOneColumnData(SSchema* pSchema, char* value, int valuelen, char* break; case TSDB_DATA_TYPE_DOUBLE: - if (valuelen == 4 && nullInt == *(int32_t*)value) { - *((int64_t*)payload) = TSDB_DATA_DOUBLE_NULL; + if (valuelen == 4 && nullInt == *(int32_t *)value) { + *((int64_t *)payload) = TSDB_DATA_DOUBLE_NULL; } else { double v = strtod(value, &endptr); - if (isNull((char*)&v, pSchema->type) || isinf(v) || isnan(v)) { - *((int32_t*)payload) = TSDB_DATA_FLOAT_NULL; + if (isNull((char *)&v, pSchema->type) || isinf(v) || isnan(v)) { + *((int32_t *)payload) = TSDB_DATA_FLOAT_NULL; } else { - *((double*)payload) = v; + *((double *)payload) = v; } if (str != NULL) { @@ -300,8 +300,10 @@ int32_t tsParseOneColumnData(SSchema* pSchema, char* value, int valuelen, char* break; case TSDB_DATA_TYPE_BINARY: - // binary data cannot be null-terminated char string, otherwise the last char of the string is lost - if (valuelen == 4 && nullInt == *(int32_t*)value) { + /* + * binary data cannot be null-terminated char string, otherwise the last char of the string is lost + */ + if (valuelen == 4 && nullInt == *(int32_t *)value) { *payload = TSDB_DATA_BINARY_NULL; } else { /* truncate too long string */ @@ -312,8 +314,8 @@ int32_t tsParseOneColumnData(SSchema* pSchema, char* value, int valuelen, char* break; case TSDB_DATA_TYPE_NCHAR: - if (valuelen == 4 && nullInt == *(int32_t*)value) { - *(uint32_t*)payload = TSDB_DATA_NCHAR_NULL; + if (valuelen == 4 && nullInt == *(int32_t *)value) { + *(uint32_t *)payload = TSDB_DATA_NCHAR_NULL; } else { if (!taosMbsToUcs4(value, valuelen, payload, pSchema->bytes)) { sprintf(msg, "%s", strerror(errno)); @@ -323,17 +325,17 @@ int32_t tsParseOneColumnData(SSchema* pSchema, char* value, int valuelen, char* break; case TSDB_DATA_TYPE_TIMESTAMP: { - if (valuelen == 4 && nullInt == *(int32_t*)value) { + if (valuelen == 4 && nullInt == *(int32_t *)value) { if (primaryKey) { - *((int64_t*)payload) = 0; + *((int64_t *)payload) = 0; } else { - *((int64_t*)payload) = TSDB_DATA_BIGINT_NULL; + *((int64_t *)payload) = TSDB_DATA_BIGINT_NULL; } } else { if (tsParseTime(value, valuelen, &temp, str, msg, timePrec) != TSDB_CODE_SUCCESS) { return TSDB_CODE_INVALID_SQL; } - *((int64_t*)payload) = temp; + *((int64_t *)payload) = temp; } break; @@ -344,7 +346,7 @@ int32_t tsParseOneColumnData(SSchema* pSchema, char* value, int valuelen, char* } // todo merge the error msg function with tSQLParser -static void setErrMsg(char* msg, char* sql) { +static void setErrMsg(char *msg, char *sql) { const char* msgFormat = "near \"%s\" syntax error"; const int32_t BACKWARD_CHAR_STEP = 15; @@ -354,16 +356,18 @@ static void setErrMsg(char* msg, char* sql) { sprintf(msg, msgFormat, buf); } -int tsParseOneRowData(char** str, char* payload, SSchema schema[], SParsedDataColInfo* spd, char* error, +int tsParseOneRowData(char **str, STableDataBlocks *pDataBlocks, SSchema schema[], SParsedDataColInfo *spd, char *error, int16_t timePrec) { - char* value = NULL; + char *value = NULL; int valuelen = 0; + char *payload = pDataBlocks->pData + pDataBlocks->size; + /* 1. set the parsed value from sql string */ int32_t rowSize = 0; for (int i = 0; i < spd->numOfAssignedCols; ++i) { /* the start position in data block buffer of current value in sql */ - char* start = payload + spd->elems[i].offset; + char * start = payload + spd->elems[i].offset; int16_t colIndex = spd->elems[i].colIndex; rowSize += schema[colIndex].bytes; @@ -394,19 +398,19 @@ int tsParseOneRowData(char** str, char* payload, SSchema schema[], SParsedDataCo } // once the data block is disordered, we do NOT keep previous timestamp any more - if (colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX && spd->ordered) { - TSKEY k = *(TSKEY*)start; - if (k < spd->prevTimestamp) { - spd->ordered = false; + if (colIndex == PRIMARYKEY_TIMESTAMP_COL_INDEX && pDataBlocks->ordered) { + TSKEY k = *(TSKEY *)start; + if (k <= pDataBlocks->prevTS) { + pDataBlocks->ordered = false; } - spd->prevTimestamp = k; + pDataBlocks->prevTS = k; } } /*2. set the null value for the rest columns */ if (spd->numOfAssignedCols < spd->numOfCols) { - char* ptr = payload; + char *ptr = payload; for (int32_t i = 0; i < spd->numOfCols; ++i) { if (!spd->hasVal[i]) { @@ -423,39 +427,42 @@ int tsParseOneRowData(char** str, char* payload, SSchema schema[], SParsedDataCo return rowSize; } -static int32_t rowDataCompar(const void* lhs, const void* rhs) { - TSKEY left = GET_INT64_VAL(lhs); - TSKEY right = GET_INT64_VAL(rhs); - DEFAULT_COMP(left, right); +static int32_t rowDataCompar(const void *lhs, const void *rhs) { + TSKEY left = *(TSKEY *)lhs; + TSKEY right = *(TSKEY *)rhs; + + if (left == right) { + return 0; + } else { + return left > right ? 1 : -1; + } } -int tsParseValues(char** str, SInsertedDataBlocks* pDataBlock, SMeterMeta* pMeterMeta, int maxRows, - SParsedDataColInfo* spd, char* error) { - char* token; +int tsParseValues(char **str, STableDataBlocks *pDataBlock, SMeterMeta *pMeterMeta, int maxRows, + SParsedDataColInfo *spd, char *error) { + char *token; int tokenlen; - SSchema* pSchema = tsGetSchema(pMeterMeta); - int16_t numOfRows = 0; - pDataBlock->size += sizeof(SShellSubmitBlock); - if (!spd->hasVal[0]) { + SSchema *pSchema = tsGetSchema(pMeterMeta); + int32_t precision = pMeterMeta->precision; + + if (spd->hasVal[0] == false) { sprintf(error, "primary timestamp column can not be null"); return -1; } while (1) { - char* tmp = tscGetToken(*str, &token, &tokenlen); + char *tmp = tscGetToken(*str, &token, &tokenlen); if (tokenlen == 0 || *token != '(') break; *str = tmp; - if (numOfRows >= maxRows || - pDataBlock->size + pMeterMeta->rowSize + sizeof(SShellSubmitBlock) >= pDataBlock->nAllocSize) { + if (numOfRows >= maxRows || pDataBlock->size + pMeterMeta->rowSize >= pDataBlock->nAllocSize) { maxRows += tscAllocateMemIfNeed(pDataBlock, pMeterMeta->rowSize); } - int32_t len = - tsParseOneRowData(str, pDataBlock->pData + pDataBlock->size, pSchema, spd, error, pMeterMeta->precision); + int32_t len = tsParseOneRowData(str, pDataBlock, pSchema, spd, error, precision); if (len <= 0) { setErrMsg(error, *str); return -1; @@ -474,26 +481,25 @@ int tsParseValues(char** str, SInsertedDataBlocks* pDataBlock, SMeterMeta* pMete if (numOfRows <= 0) { strcpy(error, "no any data points"); + return -1; + } else { + return numOfRows; } - - return numOfRows; } -static void appendDataBlock(SDataBlockList* pList, SInsertedDataBlocks* pBlocks) { +void tscAppendDataBlock(SDataBlockList *pList, STableDataBlocks *pBlocks) { if (pList->nSize >= pList->nAlloc) { pList->nAlloc = pList->nAlloc << 1; - pList->pData = realloc(pList->pData, (size_t)pList->nAlloc); + pList->pData = realloc(pList->pData, sizeof(void *) * (size_t)pList->nAlloc); // reset allocated memory - memset(pList->pData + pList->nSize, 0, POINTER_BYTES * (pList->nAlloc - pList->nSize)); + memset(pList->pData + pList->nSize, 0, sizeof(void *) * (pList->nAlloc - pList->nSize)); } pList->pData[pList->nSize++] = pBlocks; } -static void tscSetAssignedColumnInfo(SParsedDataColInfo* spd, SSchema* pSchema, int16_t numOfCols) { - spd->ordered = true; - spd->prevTimestamp = INT64_MIN; +static void tscSetAssignedColumnInfo(SParsedDataColInfo *spd, SSchema *pSchema, int32_t numOfCols) { spd->numOfCols = numOfCols; spd->numOfAssignedCols = numOfCols; @@ -507,106 +513,118 @@ static void tscSetAssignedColumnInfo(SParsedDataColInfo* spd, SSchema* pSchema, } } -int32_t tscAllocateMemIfNeed(SInsertedDataBlocks* pDataBlock, int32_t rowSize) { +int32_t tscAllocateMemIfNeed(STableDataBlocks *pDataBlock, int32_t rowSize) { size_t remain = pDataBlock->nAllocSize - pDataBlock->size; + const int factor = 5; // expand the allocated size - if (remain <= sizeof(SShellSubmitBlock) + rowSize) { - int32_t oldSize = pDataBlock->nAllocSize; - - pDataBlock->nAllocSize = (uint32_t)(oldSize * 1.5); + while (remain < rowSize * factor) { + pDataBlock->nAllocSize = (uint32_t) (pDataBlock->nAllocSize * 1.5); + remain = pDataBlock->nAllocSize - pDataBlock->size; + } - char* tmp = realloc(pDataBlock->pData, (size_t)pDataBlock->nAllocSize); - if (tmp != NULL) { - pDataBlock->pData = tmp; - } else { - // do nothing - } + char *tmp = realloc(pDataBlock->pData, (size_t)pDataBlock->nAllocSize); + if (tmp != NULL) { + pDataBlock->pData = tmp; + memset(pDataBlock->pData + pDataBlock->size, 0, pDataBlock->nAllocSize - pDataBlock->size); + } else { + assert(false); + // do nothing } - return (int32_t)(pDataBlock->nAllocSize - pDataBlock->size - sizeof(SShellSubmitBlock)) / rowSize; + return (int32_t)(pDataBlock->nAllocSize - pDataBlock->size) / rowSize; } -void tsSetBlockInfo(SShellSubmitBlock* pBlocks, const SMeterMeta* pMeterMeta, int32_t numOfRows) { - pBlocks->sid = htonl(pMeterMeta->sid); - pBlocks->uid = htobe64(pMeterMeta->uid); - pBlocks->sversion = htonl(pMeterMeta->sversion); - pBlocks->numOfRows = htons(numOfRows); +static void tsSetBlockInfo(SShellSubmitBlock *pBlocks, const SMeterMeta *pMeterMeta, int32_t numOfRows) { + pBlocks->sid = pMeterMeta->sid; + pBlocks->uid = pMeterMeta->uid; + pBlocks->sversion = pMeterMeta->sversion; + pBlocks->numOfRows += numOfRows; } -static int32_t doParseInsertStatement(SSqlCmd* pCmd, SSqlRes* pRes, void* pDataBlockHashList, char** str, - SParsedDataColInfo* spd) { - SMeterMeta* pMeterMeta = pCmd->pMeterMeta; - int32_t numOfRows = 0; +int32_t sortRemoveDuplicates(STableDataBlocks *dataBuf, int32_t numOfRows) { + // data block is disordered, sort it in ascending order + if (!dataBuf->ordered) { + char *pBlockData = dataBuf->pData + sizeof(SShellSubmitBlock); + qsort(pBlockData, numOfRows, dataBuf->rowSize, rowDataCompar); - SInsertedDataBlocks** pData = (SInsertedDataBlocks**)taosGetIntHashData(pDataBlockHashList, pMeterMeta->vgid); - SInsertedDataBlocks* dataBuf = NULL; + int32_t i = 0; + int32_t j = 1; - /* no data in hash list */ - if (pData == NULL) { - dataBuf = tscCreateDataBlock(TSDB_PAYLOAD_SIZE); + while (j < numOfRows) { + TSKEY ti = *(TSKEY *)(pBlockData + dataBuf->rowSize * i); + TSKEY tj = *(TSKEY *)(pBlockData + dataBuf->rowSize * j); - /* here we only keep the pointer of chunk of buffer, not the whole buffer */ - dataBuf = *(SInsertedDataBlocks**)taosAddIntHash(pDataBlockHashList, pCmd->pMeterMeta->vgid, (char*)&dataBuf); + if (ti == tj) { + ++j; + continue; + } - dataBuf->size = tsInsertHeadSize; - strncpy(dataBuf->meterId, pCmd->name, tListLen(pCmd->name)); - appendDataBlock(pCmd->pDataBlocks, dataBuf); - } else { - dataBuf = *pData; + int32_t nextPos = (++i); + if (nextPos != j) { + memmove(pBlockData + dataBuf->rowSize * nextPos, pBlockData + dataBuf->rowSize * j, dataBuf->rowSize); + } + + ++j; + } + + numOfRows = i + 1; + dataBuf->ordered = true; } + return numOfRows; +} + +static int32_t doParseInsertStatement(SSqlObj *pSql, void *pTableHashList, char **str, SParsedDataColInfo *spd, + int32_t *totalNum) { + SSqlCmd * pCmd = &pSql->cmd; + SMeterMeta *pMeterMeta = pCmd->pMeterMeta; + + STableDataBlocks *dataBuf = + tscGetDataBlockFromList(pTableHashList, pCmd->pDataBlocks, pMeterMeta->uid, TSDB_DEFAULT_PAYLOAD_SIZE, + sizeof(SShellSubmitBlock), pMeterMeta->rowSize, pCmd->name); + int32_t maxNumOfRows = tscAllocateMemIfNeed(dataBuf, pMeterMeta->rowSize); - int64_t startPos = dataBuf->size; - numOfRows = tsParseValues(str, dataBuf, pMeterMeta, maxNumOfRows, spd, pCmd->payload); + int32_t numOfRows = tsParseValues(str, dataBuf, pMeterMeta, maxNumOfRows, spd, pCmd->payload); if (numOfRows <= 0) { return TSDB_CODE_INVALID_SQL; } - // data block is disordered, sort it in ascending order - if (!spd->ordered) { - char* pBlockData = dataBuf->pData + startPos + sizeof(SShellSubmitBlock); - qsort(pBlockData, numOfRows, pMeterMeta->rowSize, rowDataCompar); - spd->ordered = true; - } - - SShellSubmitBlock* pBlocks = (SShellSubmitBlock*)(dataBuf->pData + startPos); + SShellSubmitBlock *pBlocks = (SShellSubmitBlock *)(dataBuf->pData); tsSetBlockInfo(pBlocks, pMeterMeta, numOfRows); - dataBuf->numOfMeters += 1; + + dataBuf->vgid = pMeterMeta->vgid; + dataBuf->numOfMeters = 1; /* - * the value of pRes->numOfRows does not affect the true result of AFFECTED ROWS, which is - * actually returned from server. - * - * NOTE: - * The better way is to use a local variable to store the number of rows that - * has been extracted from sql expression string, and avoid to do the invalid write check + * the value of pRes->numOfRows does not affect the true result of AFFECTED ROWS, + * which is actually returned from server. */ - pRes->numOfRows += numOfRows; + *totalNum += numOfRows; return TSDB_CODE_SUCCESS; } -static int32_t tscParseSqlForCreateTableOnDemand(char** sqlstr, SSqlObj* pSql) { - char* id = NULL; +static int32_t tscParseSqlForCreateTableOnDemand(char **sqlstr, SSqlObj *pSql) { + char * id = NULL; int32_t idlen = 0; int32_t code = TSDB_CODE_SUCCESS; - SSqlCmd* pCmd = &pSql->cmd; - char* sql = *sqlstr; + SSqlCmd *pCmd = &pSql->cmd; + char * sql = *sqlstr; sql = tscGetToken(sql, &id, &idlen); /* build the token of specified table */ SSQLToken tableToken = {.z = id, .n = idlen, .type = TK_ID}; - char* cstart = NULL; - char* cend = NULL; + char *cstart = NULL; + char *cend = NULL; /* skip possibly exists column list */ sql = tscGetToken(sql, &id, &idlen); int32_t numOfColList = 0; - bool createTable = false; + bool createTable = false; if (id[0] == '(' && idlen == 1) { cstart = &id[0]; @@ -630,7 +648,7 @@ static int32_t tscParseSqlForCreateTableOnDemand(char** sqlstr, SSqlObj* pSql) { if (strncmp(id, "using", idlen) == 0 && idlen == 5) { /* create table if not exists */ sql = tscGetToken(sql, &id, &idlen); - STagData* pTag = (STagData*)pCmd->payload; + STagData *pTag = (STagData *)pCmd->payload; memset(pTag, 0, sizeof(STagData)); SSQLToken token1 = {idlen, TK_ID, id}; @@ -643,8 +661,14 @@ static int32_t tscParseSqlForCreateTableOnDemand(char** sqlstr, SSqlObj* pSql) { return code; } - char* tagVal = pTag->data; - SSchema* pTagSchema = tsGetTagSchema(pCmd->pMeterMeta); + if (!UTIL_METER_IS_METRIC(pCmd)) { + const char* msg = "create table only from super table is allowed"; + sprintf(pCmd->payload, "%s", msg); + return TSDB_CODE_INVALID_SQL; + } + + char * tagVal = pTag->data; + SSchema *pTagSchema = tsGetTagSchema(pCmd->pMeterMeta); sql = tscGetToken(sql, &id, &idlen); if (!(strncmp(id, "tags", idlen) == 0 && idlen == 4)) { @@ -722,11 +746,11 @@ static int32_t tscParseSqlForCreateTableOnDemand(char** sqlstr, SSqlObj* pSql) { return code; } -int validateTableName(char* tblName, int len) { +int validateTableName(char *tblName, int len) { char buf[TSDB_METER_ID_LEN] = {0}; - memcpy(buf, tblName, len); + strncpy(buf, tblName, len); - SSQLToken token = {len, TK_ID, buf}; + SSQLToken token = {.n = len, .type = TK_ID, .z = buf}; tSQLGetToken(buf, &token.type); return tscValidateName(&token); @@ -742,18 +766,21 @@ int validateTableName(char* tblName, int len) { * @param pSql * @return */ -int tsParseInsertStatement(SSqlCmd* pCmd, char* str, char* acct, char* db, SSqlObj* pSql) { +int tsParseInsertStatement(SSqlObj *pSql, char *str, char *acct, char *db) { + SSqlCmd *pCmd = &pSql->cmd; + pCmd->command = TSDB_SQL_INSERT; pCmd->isInsertFromFile = -1; pCmd->count = 0; pSql->res.numOfRows = 0; + int32_t totalNum = 0; if (!pSql->pTscObj->writeAuth) { return TSDB_CODE_NO_RIGHTS; } - char* id; + char *id; int idlen; int code = TSDB_CODE_INVALID_SQL; @@ -766,15 +793,21 @@ int tsParseInsertStatement(SSqlCmd* pCmd, char* str, char* acct, char* db, SSqlO return code; } - void* pDataBlockHashList = taosInitIntHash(4, POINTER_BYTES, taosHashInt); + void *pTableHashList = taosInitIntHash(128, sizeof(void *), taosHashInt); pSql->cmd.pDataBlocks = tscCreateBlockArrayList(); tscTrace("%p create data block list for submit data, %p", pSql, pSql->cmd.pDataBlocks); while (1) { tscGetToken(str, &id, &idlen); + if (idlen == 0) { - if ((pSql->res.numOfRows > 0) || (1 == pCmd->isInsertFromFile)) { + // parse file, do not release the STableDataBlock + if (pCmd->isInsertFromFile == 1) { + goto _clean; + } + + if (totalNum > 0) { break; } else { // no data in current sql string, error code = TSDB_CODE_INVALID_SQL; @@ -782,10 +815,7 @@ int tsParseInsertStatement(SSqlCmd* pCmd, char* str, char* acct, char* db, SSqlO } } - /* - * Check the validity of the table name - * - */ + // Check if the table name available or not if (validateTableName(id, idlen) != TSDB_CODE_SUCCESS) { code = TSDB_CODE_INVALID_SQL; sprintf(pCmd->payload, "table name is invalid"); @@ -797,7 +827,7 @@ int tsParseInsertStatement(SSqlCmd* pCmd, char* str, char* acct, char* db, SSqlO goto _error_clean; } - void* fp = pSql->fp; + void *fp = pSql->fp; if ((code = tscParseSqlForCreateTableOnDemand(&str, pSql)) != TSDB_CODE_SUCCESS) { if (fp != NULL) { goto _clean; @@ -826,7 +856,7 @@ int tsParseInsertStatement(SSqlCmd* pCmd, char* str, char* acct, char* db, SSqlO if (strncmp(id, "values", 6) == 0 && idlen == 6) { SParsedDataColInfo spd = {0}; - SSchema* pSchema = tsGetSchema(pCmd->pMeterMeta); + SSchema * pSchema = tsGetSchema(pCmd->pMeterMeta); tscSetAssignedColumnInfo(&spd, pSchema, pCmd->pMeterMeta->numOfColumns); @@ -844,7 +874,7 @@ int tsParseInsertStatement(SSqlCmd* pCmd, char* str, char* acct, char* db, SSqlO * app here insert data in different vnodes, so we need to set the following * data in another submit procedure using async insert routines */ - code = doParseInsertStatement(pCmd, &pSql->res, pDataBlockHashList, &str, &spd); + code = doParseInsertStatement(pSql, pTableHashList, &str, &spd, &totalNum); if (code != TSDB_CODE_SUCCESS) { goto _error_clean; } @@ -867,34 +897,29 @@ int tsParseInsertStatement(SSqlCmd* pCmd, char* str, char* acct, char* db, SSqlO goto _error_clean; } - char* fname = calloc(1, idlen + 1); - memcpy(fname, id, idlen); + char fname[PATH_MAX] = {0}; + strncpy(fname, id, idlen); + strdequote(fname); wordexp_t full_path; if (wordexp(fname, &full_path, 0) != 0) { code = TSDB_CODE_INVALID_SQL; sprintf(pCmd->payload, "invalid filename"); - free(fname); goto _error_clean; } - strcpy(fname, full_path.we_wordv[0]); wordfree(&full_path); - SInsertedDataBlocks* dataBuf = tscCreateDataBlock(strlen(fname) + sizeof(SInsertedDataBlocks) + 1); - strcpy(dataBuf->filename, fname); - - dataBuf->size = strlen(fname) + 1; - free(fname); - - strcpy(dataBuf->meterId, pCmd->name); - appendDataBlock(pCmd->pDataBlocks, dataBuf); + STableDataBlocks* pDataBlock = tscCreateDataBlockEx(PATH_MAX, pCmd->pMeterMeta->rowSize, sizeof(SShellSubmitBlock), + pCmd->name); + tscAppendDataBlock(pCmd->pDataBlocks, pDataBlock); + strcpy(pDataBlock->filename, fname); str = id + idlen; } else if (idlen == 1 && id[0] == '(') { /* insert into tablename(col1, col2,..., coln) values(v1, v2,... vn); */ - SMeterMeta* pMeterMeta = pCmd->pMeterMeta; - SSchema* pSchema = tsGetSchema(pMeterMeta); + SMeterMeta *pMeterMeta = pCmd->pMeterMeta; + SSchema * pSchema = tsGetSchema(pMeterMeta); if (pCmd->isInsertFromFile == -1) { pCmd->isInsertFromFile = 0; @@ -905,8 +930,6 @@ int tsParseInsertStatement(SSqlCmd* pCmd, char* str, char* acct, char* db, SSqlO } SParsedDataColInfo spd = {0}; - spd.ordered = true; - spd.prevTimestamp = INT64_MIN; spd.numOfCols = pMeterMeta->numOfColumns; int16_t offset[TSDB_MAX_COLUMNS] = {0}; @@ -925,7 +948,7 @@ int tsParseInsertStatement(SSqlCmd* pCmd, char* str, char* acct, char* db, SSqlO // todo speedup by using hash list for (int32_t t = 0; t < pMeterMeta->numOfColumns; ++t) { if (strncmp(id, pSchema[t].name, idlen) == 0 && strlen(pSchema[t].name) == idlen) { - SParsedColElem* pElem = &spd.elems[spd.numOfAssignedCols++]; + SParsedColElem *pElem = &spd.elems[spd.numOfAssignedCols++]; pElem->offset = offset[t]; pElem->colIndex = t; @@ -961,7 +984,7 @@ int tsParseInsertStatement(SSqlCmd* pCmd, char* str, char* acct, char* db, SSqlO goto _error_clean; } - code = doParseInsertStatement(pCmd, &pSql->res, pDataBlockHashList, &str, &spd); + code = doParseInsertStatement(pSql, pTableHashList, &str, &spd, &totalNum); if (code != TSDB_CODE_SUCCESS) { goto _error_clean; } @@ -972,53 +995,51 @@ int tsParseInsertStatement(SSqlCmd* pCmd, char* str, char* acct, char* db, SSqlO } } - /* submit to more than one vnode */ + // submit to more than one vnode if (pCmd->pDataBlocks->nSize > 0) { - // lihui: if import file, only malloc the size of file name - if (1 != pCmd->isInsertFromFile) { - tscFreeUnusedDataBlocks(pCmd->pDataBlocks); + // merge according to vgid + tscMergeTableDataBlocks(pCmd, pCmd->pDataBlocks); - SInsertedDataBlocks* pDataBlock = pCmd->pDataBlocks->pData[0]; - if ((code = tscCopyDataBlockToPayload(pSql, pDataBlock)) != TSDB_CODE_SUCCESS) { - goto _error_clean; - } + STableDataBlocks *pDataBlock = pCmd->pDataBlocks->pData[0]; + if ((code = tscCopyDataBlockToPayload(pSql, pDataBlock)) != TSDB_CODE_SUCCESS) { + goto _error_clean; } pCmd->vnodeIdx = 1; // set the next sent data vnode index in data block arraylist } else { - tscDestroyBlockArrayList(&pCmd->pDataBlocks); + pCmd->pDataBlocks = tscDestroyBlockArrayList(pCmd->pDataBlocks); } code = TSDB_CODE_SUCCESS; goto _clean; _error_clean: - tscDestroyBlockArrayList(&pCmd->pDataBlocks); + pCmd->pDataBlocks = tscDestroyBlockArrayList(pCmd->pDataBlocks); _clean: - taosCleanUpIntHash(pDataBlockHashList); + taosCleanUpIntHash(pTableHashList); return code; } -int tsParseImportStatement(SSqlObj* pSql, char* str, char* acct, char* db) { - SSqlCmd* pCmd = &pSql->cmd; +int tsParseImportStatement(SSqlObj *pSql, char *str, char *acct, char *db) { + SSqlCmd *pCmd = &pSql->cmd; pCmd->order.order = TSQL_SO_ASC; - return tsParseInsertStatement(pCmd, str, acct, db, pSql); + return tsParseInsertStatement(pSql, str, acct, db); } -int tsParseInsertSql(SSqlObj* pSql, char* sql, char* acct, char* db) { - char* verb; +int tsParseInsertSql(SSqlObj *pSql, char *sql, char *acct, char *db) { + char *verb; int verblen; int code = TSDB_CODE_INVALID_SQL; - SSqlCmd* pCmd = &pSql->cmd; + SSqlCmd *pCmd = &pSql->cmd; tscCleanSqlCmd(pCmd); sql = tscGetToken(sql, &verb, &verblen); if (verblen) { if (strncmp(verb, "insert", 6) == 0 && verblen == 6) { - code = tsParseInsertStatement(pCmd, sql, acct, db, pSql); + code = tsParseInsertStatement(pSql, sql, acct, db); } else if (strncmp(verb, "import", 6) == 0 && verblen == 6) { code = tsParseImportStatement(pSql, sql, acct, db); } else { @@ -1029,27 +1050,23 @@ int tsParseInsertSql(SSqlObj* pSql, char* sql, char* acct, char* db) { sprintf(pCmd->payload, "no any keywords"); } - // sql object has not been released in async model - if (pSql->signature == pSql) { - pSql->res.numOfRows = 0; - } - return code; } -int tsParseSql(SSqlObj* pSql, char* acct, char* db, bool multiVnodeInsertion) { +int tsParseSql(SSqlObj *pSql, char *acct, char *db, bool multiVnodeInsertion) { int32_t ret = TSDB_CODE_SUCCESS; if (tscIsInsertOrImportData(pSql->sqlstr)) { /* - * only for async multi-vnode insertion Set the fp before parse the sql string, in case of getmetermeta failed, - * in which the error handle callback function can rightfully restore the user defined function (fp) + * only for async multi-vnode insertion + * Set the fp before parse the sql string, in case of getmetermeta failed, in which + * the error handle callback function can rightfully restore the user defined function (fp) */ if (pSql->fp != NULL && multiVnodeInsertion) { assert(pSql->fetchFp == NULL); pSql->fetchFp = pSql->fp; - /* replace user defined callback function with multi-insert proxy function*/ + /* replace user defined callback function with multi-insert proxy function */ pSql->fp = tscAsyncInsertMultiVnodesProxy; } @@ -1072,31 +1089,55 @@ int tsParseSql(SSqlObj* pSql, char* acct, char* db, bool multiVnodeInsertion) { return ret; } -static int tscInsertDataFromFile(SSqlObj* pSql, FILE* fp) { - // TODO : import data from file - int readLen = 0; - char* line = NULL; - size_t n = 0; - int len = 0; - uint32_t maxRows = 0; - SSqlCmd* pCmd = &pSql->cmd; - char* pStart = pCmd->payload + tsInsertHeadSize; - SMeterMeta* pMeterMeta = pCmd->pMeterMeta; - int numOfRows = 0; - uint32_t rowSize = pMeterMeta->rowSize; - char error[128] = "\0"; - SShellSubmitBlock* pBlock = (SShellSubmitBlock*)(pStart); - pStart += sizeof(SShellSubmitBlock); - int nrows = 0; - - const int32_t RESERVED_SIZE = 1024; - - maxRows = (TSDB_PAYLOAD_SIZE - RESERVED_SIZE - sizeof(SShellSubmitBlock)) / rowSize; +static int doPackSendDataBlock(SSqlObj* pSql, int32_t numOfRows, STableDataBlocks* pTableDataBlocks) { + int32_t code = TSDB_CODE_SUCCESS; + SSqlCmd* pCmd = &pSql->cmd; + + SMeterMeta* pMeterMeta = pCmd->pMeterMeta; + + SShellSubmitBlock *pBlocks = (SShellSubmitBlock *)(pTableDataBlocks->pData); + tsSetBlockInfo(pBlocks, pMeterMeta, numOfRows); + + tscMergeTableDataBlocks(pCmd, pCmd->pDataBlocks); + + // the pDataBlock is different from the pTableDataBlocks + STableDataBlocks *pDataBlock = pCmd->pDataBlocks->pData[0]; + if ((code = tscCopyDataBlockToPayload(pSql, pDataBlock)) != TSDB_CODE_SUCCESS) { + return code; + } + + if ((code = tscProcessSql(pSql)) != TSDB_CODE_SUCCESS) { + return code; + } + + return TSDB_CODE_SUCCESS; +} + +static int tscInsertDataFromFile(SSqlObj *pSql, FILE *fp) { + int readLen = 0; + char * line = NULL; + size_t n = 0; + int len = 0; + uint32_t maxRows = 0; + SSqlCmd * pCmd = &pSql->cmd; + SMeterMeta *pMeterMeta = pCmd->pMeterMeta; + int numOfRows = 0; + int32_t rowSize = pMeterMeta->rowSize; + int32_t code = 0; + int nrows = 0; + + pCmd->pDataBlocks = tscCreateBlockArrayList(); + STableDataBlocks* pTableDataBlock = tscCreateDataBlockEx(TSDB_PAYLOAD_SIZE, pMeterMeta->rowSize, + sizeof(SShellSubmitBlock), pCmd->name); + + tscAppendDataBlock(pCmd->pDataBlocks, pTableDataBlock); + + maxRows = tscAllocateMemIfNeed(pTableDataBlock, rowSize); if (maxRows < 1) return -1; int count = 0; - SParsedDataColInfo spd = {0}; - SSchema* pSchema = tsGetSchema(pCmd->pMeterMeta); + SParsedDataColInfo spd = {.numOfCols = pCmd->pMeterMeta->numOfColumns}; + SSchema * pSchema = tsGetSchema(pCmd->pMeterMeta); tscSetAssignedColumnInfo(&spd, pSchema, pCmd->pMeterMeta->numOfColumns); @@ -1105,43 +1146,42 @@ static int tscInsertDataFromFile(SSqlObj* pSql, FILE* fp) { if (('\r' == line[readLen - 1]) || ('\n' == line[readLen - 1])) line[--readLen] = 0; if (readLen <= 0) continue; - char* lineptr = line; + char *lineptr = line; strtolower(line, line); - len = tsParseOneRowData(&lineptr, pStart, pSchema, &spd, error, pCmd->pMeterMeta->precision); + + len = tsParseOneRowData(&lineptr, pTableDataBlock, pSchema, &spd, pCmd->payload, pMeterMeta->precision); if (len <= 0) return -1; - pStart += len; + + pTableDataBlock->size += len; count++; nrows++; if (count >= maxRows) { - pCmd->payloadLen = (pStart - pCmd->payload); - pBlock->sid = htonl(pMeterMeta->sid); - pBlock->numOfRows = htons(count); - pSql->res.numOfRows = 0; - if (tscProcessSql(pSql) != 0) { - return -1; + if ((code = doPackSendDataBlock(pSql, count, pTableDataBlock)) != TSDB_CODE_SUCCESS) { + return -code; } + + pTableDataBlock = pCmd->pDataBlocks->pData[0]; + pTableDataBlock->size = sizeof(SShellSubmitBlock); + pTableDataBlock->rowSize = pMeterMeta->rowSize; + numOfRows += pSql->res.numOfRows; + pSql->res.numOfRows = 0; count = 0; - memset(pCmd->payload, 0, TSDB_PAYLOAD_SIZE); - pStart = pCmd->payload + tsInsertHeadSize; - pBlock = (SShellSubmitBlock*)(pStart); - pStart += sizeof(SShellSubmitBlock); } } if (count > 0) { - pCmd->payloadLen = (pStart - pCmd->payload); - pBlock->sid = htonl(pMeterMeta->sid); - pBlock->numOfRows = htons(count); - pSql->res.numOfRows = 0; - if (tscProcessSql(pSql) != 0) { - return -1; + if ((code = doPackSendDataBlock(pSql, count, pTableDataBlock)) != TSDB_CODE_SUCCESS) { + return -code; } + numOfRows += pSql->res.numOfRows; + pSql->res.numOfRows = 0; } if (line) tfree(line); + return numOfRows; } @@ -1151,20 +1191,21 @@ static int tscInsertDataFromFile(SSqlObj* pSql, FILE* fp) { * 2019.05.10 lihui * Remove the code for importing records from files */ -void tscProcessMultiVnodesInsert(SSqlObj* pSql) { - SSqlCmd* pCmd = &pSql->cmd; +void tscProcessMultiVnodesInsert(SSqlObj *pSql) { + SSqlCmd *pCmd = &pSql->cmd; if (pCmd->command != TSDB_SQL_INSERT) { return; } - SInsertedDataBlocks* pDataBlock = NULL; - int32_t code = TSDB_CODE_SUCCESS; + STableDataBlocks *pDataBlock = NULL; + int32_t affected_rows = 0; + int32_t code = TSDB_CODE_SUCCESS; /* the first block has been sent to server in processSQL function */ assert(pCmd->isInsertFromFile != -1 && pCmd->vnodeIdx >= 1 && pCmd->pDataBlocks != NULL); if (pCmd->vnodeIdx < pCmd->pDataBlocks->nSize) { - SDataBlockList* pDataBlocks = pCmd->pDataBlocks; + SDataBlockList *pDataBlocks = pCmd->pDataBlocks; for (int32_t i = pCmd->vnodeIdx; i < pDataBlocks->nSize; ++i) { pDataBlock = pDataBlocks->pData[i]; @@ -1182,59 +1223,70 @@ void tscProcessMultiVnodesInsert(SSqlObj* pSql) { } // all data have been submit to vnode, release data blocks - tscDestroyBlockArrayList(&pCmd->pDataBlocks); + pCmd->pDataBlocks = tscDestroyBlockArrayList(pCmd->pDataBlocks); } /* multi-vnodes insertion in sync query model */ -void tscProcessMultiVnodesInsertForFile(SSqlObj* pSql) { - SSqlCmd* pCmd = &pSql->cmd; +void tscProcessMultiVnodesInsertForFile(SSqlObj *pSql) { + SSqlCmd *pCmd = &pSql->cmd; if (pCmd->command != TSDB_SQL_INSERT) { return; } - SInsertedDataBlocks* pDataBlock = NULL; - int32_t affected_rows = 0; - - assert(pCmd->isInsertFromFile == 1 && pCmd->vnodeIdx >= 1 && pCmd->pDataBlocks != NULL); + STableDataBlocks *pDataBlock = NULL; + int32_t affected_rows = 0; - SDataBlockList* pDataBlocks = pCmd->pDataBlocks; + assert(pCmd->isInsertFromFile == 1 && pCmd->pDataBlocks != NULL); + SDataBlockList *pDataBlockList = pCmd->pDataBlocks; + pCmd->pDataBlocks = NULL; - pCmd->isInsertFromFile = 0; // for tscProcessSql() + char path[PATH_MAX] = {0}; - pSql->res.numOfRows = 0; - for (int32_t i = 0; i < pDataBlocks->nSize; ++i) { - pDataBlock = pDataBlocks->pData[i]; + for (int32_t i = 0; i < pDataBlockList->nSize; ++i) { + pDataBlock = pDataBlockList->pData[i]; if (pDataBlock == NULL) { continue; } tscAllocPayloadWithSize(pCmd, TSDB_PAYLOAD_SIZE); - pCmd->count = 1; - FILE* fp = fopen(pDataBlock->filename, "r"); + strncpy(path, pDataBlock->filename, PATH_MAX); + + FILE *fp = fopen(path, "r"); if (fp == NULL) { - tscError("%p Failed to open file %s to insert data from file", pSql, pDataBlock->filename); + tscError("%p failed to open file %s to load data from file, reason:%s", pSql, path, + strerror(errno)); continue; } strcpy(pCmd->name, pDataBlock->meterId); - tscGetMeterMeta(pSql, pCmd->name); + memset(pDataBlock->pData, 0, pDataBlock->nAllocSize); + + int32_t ret = tscGetMeterMeta(pSql, pCmd->name); + if (ret != TSDB_CODE_SUCCESS) { + tscError("%p get meter meta failed, abort", pSql); + continue; + } + int nrows = tscInsertDataFromFile(pSql, fp); + pCmd->pDataBlocks = tscDestroyBlockArrayList(pCmd->pDataBlocks); + if (nrows < 0) { fclose(fp); - tscTrace("%p There is no record in file %s", pSql, pDataBlock->filename); + tscTrace("%p no records in file %s", pSql, path); continue; } - fclose(fp); + fclose(fp); affected_rows += nrows; - tscTrace("%p Insert data %d records from file %s", pSql, nrows, pDataBlock->filename); + tscTrace("%p Insert data %d records from file %s", pSql, nrows, path); } pSql->res.numOfRows = affected_rows; // all data have been submit to vnode, release data blocks - tscDestroyBlockArrayList(&pCmd->pDataBlocks); + pCmd->pDataBlocks = tscDestroyBlockArrayList(pCmd->pDataBlocks); + tscDestroyBlockArrayList(pDataBlockList); } diff --git a/src/client/src/tscSQLParserImpl.c b/src/client/src/tscSQLParserImpl.c index ebb6fecbd2dad692e61284cad018d706823ee48f..a0d2f09ae71952847cfe0b762be4336796bad573 100644 --- a/src/client/src/tscSQLParserImpl.c +++ b/src/client/src/tscSQLParserImpl.c @@ -140,12 +140,10 @@ tSQLExpr *tSQLExprIdValueCreate(SSQLToken *pToken, int32_t optrType) { nodePtr->val.nType = TSDB_DATA_TYPE_BIGINT; nodePtr->nSQLOptr = TK_TIMESTAMP; } else { // must be field id if not numbers - if (pToken != NULL) { - assert(optrType == TK_ID); - /* it must be the column name (tk_id) */ + assert(optrType == TK_ALL || optrType == TK_ID); + + if (pToken != NULL) { // it must be the column name (tk_id) nodePtr->colInfo = *pToken; - } else { - assert(optrType == TK_ALL); } nodePtr->nSQLOptr = optrType; diff --git a/src/client/src/tscSecondaryMerge.c b/src/client/src/tscSecondaryMerge.c index b912264c4edc061e63ab5c62c6bf9bda20a4b1f0..d4745f652508336345121a87c633f03f5623217d 100644 --- a/src/client/src/tscSecondaryMerge.c +++ b/src/client/src/tscSecondaryMerge.c @@ -19,25 +19,26 @@ #include #include "tlosertree.h" -#include "tsclient.h" +#include "tlosertree.h" #include "tscSecondaryMerge.h" #include "tscUtil.h" +#include "tsclient.h" #include "tutil.h" typedef struct SCompareParam { - SLocalDataSrc ** pLocalData; - tOrderDescriptor *pDesc; - int32_t numOfElems; - int32_t groupOrderType; + SLocalDataSource **pLocalData; + tOrderDescriptor * pDesc; + int32_t numOfElems; + int32_t groupOrderType; } SCompareParam; int32_t treeComparator(const void *pLeft, const void *pRight, void *param) { int32_t pLeftIdx = *(int32_t *)pLeft; int32_t pRightIdx = *(int32_t *)pRight; - SCompareParam * pParam = (SCompareParam *)param; - tOrderDescriptor *pDesc = pParam->pDesc; - SLocalDataSrc ** pLocalData = pParam->pLocalData; + SCompareParam * pParam = (SCompareParam *)param; + tOrderDescriptor * pDesc = pParam->pDesc; + SLocalDataSource **pLocalData = pParam->pLocalData; /* this input is exhausted, set the special value to denote this */ if (pLocalData[pLeftIdx]->rowIdx == -1) { @@ -105,7 +106,7 @@ static void tscInitSqlContext(SSqlCmd *pCmd, SSqlRes *pRes, SLocalReducer *pRedu } /* - * todo error process with async process + * todo release allocated memory process with async process */ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrderDescriptor *pDesc, tColModel *finalmodel, SSqlCmd *pCmd, SSqlRes *pRes) { @@ -133,32 +134,32 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd if (numOfFlush == 0 || numOfBuffer == 0) { tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer); - tscTrace("%p retrieved no data", pSqlObjAddr); + return; } if (pDesc->pSchema->maxCapacity >= pMemBuffer[0]->nPageSize) { - tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer); - tscError("%p Invalid value of buffer capacity %d and page size %d ", pSqlObjAddr, pDesc->pSchema->maxCapacity, pMemBuffer[0]->nPageSize); + + tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer); pRes->code = TSDB_CODE_APP_ERROR; return; } - size_t nReducerSize = sizeof(SLocalReducer) + POINTER_BYTES * numOfFlush; + size_t nReducerSize = sizeof(SLocalReducer) + sizeof(void *) * numOfFlush; SLocalReducer *pReducer = (SLocalReducer *)calloc(1, nReducerSize); if (pReducer == NULL) { - tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer); - tscError("%p failed to create merge structure", pSqlObjAddr); + + tscLocalReducerEnvDestroy(pMemBuffer, pDesc, finalmodel, numOfBuffer); pRes->code = TSDB_CODE_CLI_OUT_OF_MEMORY; return; } pReducer->pExtMemBuffer = pMemBuffer; - pReducer->pLocalDataSrc = (SLocalDataSrc **)&pReducer[1]; + pReducer->pLocalDataSrc = (SLocalDataSource **)&pReducer[1]; assert(pReducer->pLocalDataSrc != NULL); pReducer->numOfBuffer = numOfFlush; @@ -172,7 +173,7 @@ void tscCreateLocalReducer(tExtMemBuffer **pMemBuffer, int32_t numOfBuffer, tOrd int32_t numOfFlushoutInFile = pMemBuffer[i]->fileMeta.flushoutData.nLength; for (int32_t j = 0; j < numOfFlushoutInFile; ++j) { - SLocalDataSrc *pDS = (SLocalDataSrc *)malloc(sizeof(SLocalDataSrc) + pMemBuffer[0]->nPageSize); + SLocalDataSource *pDS = (SLocalDataSource *)malloc(sizeof(SLocalDataSource) + pMemBuffer[0]->nPageSize); if (pDS == NULL) { tscError("%p failed to create merge structure", pSqlObjAddr); pRes->code = TSDB_CODE_CLI_OUT_OF_MEMORY; @@ -468,9 +469,7 @@ static int32_t createOrderDescriptor(tOrderDescriptor **pOrderDesc, SSqlCmd *pCm } if (pCmd->nAggTimeInterval != 0) { - /* - * the first column is the timestamp, handles queries like "interval(10m) group by tags" - */ + //the first column is the timestamp, handles queries like "interval(10m) group by tags" orderIdx[numOfGroupByCols - 1] = PRIMARYKEY_TIMESTAMP_COL_INDEX; } } @@ -485,29 +484,32 @@ static int32_t createOrderDescriptor(tOrderDescriptor **pOrderDesc, SSqlCmd *pCm } } -bool isSameGroupOfPrev(SSqlCmd *pCmd, SLocalReducer *pReducer, char *pPrev, tFilePage *tmpPage) { +bool isSameGroup(SSqlCmd *pCmd, SLocalReducer *pReducer, char *pPrev, tFilePage *tmpBuffer) { int16_t functionId = tscSqlExprGet(pCmd, 0)->sqlFuncId; - if (functionId == TSDB_FUNC_PRJ || functionId == TSDB_FUNC_ARITHM) { // column projection query - return false; // disable merge procedure + + // disable merge procedure for column projection query + if (functionId == TSDB_FUNC_PRJ || functionId == TSDB_FUNC_ARITHM) { + return false; } tOrderDescriptor *pOrderDesc = pReducer->pDesc; int32_t numOfCols = pOrderDesc->orderIdx.numOfOrderedCols; - if (numOfCols > 0) { - if (pOrderDesc->orderIdx.pData[numOfCols - 1] == PRIMARYKEY_TIMESTAMP_COL_INDEX) { //<= 0 - /* metric interval query */ - assert(pCmd->nAggTimeInterval > 0); - pOrderDesc->orderIdx.numOfOrderedCols -= 1; - } else { /* simple group by query */ - assert(pCmd->nAggTimeInterval == 0); - } - } else { + // no group by columns, all data belongs to one group + if (numOfCols <= 0) { return true; } + if (pOrderDesc->orderIdx.pData[numOfCols - 1] == PRIMARYKEY_TIMESTAMP_COL_INDEX) { //<= 0 + // super table interval query + assert(pCmd->nAggTimeInterval > 0); + pOrderDesc->orderIdx.numOfOrderedCols -= 1; + } else { // simple group by query + assert(pCmd->nAggTimeInterval == 0); + } + // only one row exists - int32_t ret = compare_a(pOrderDesc, 1, 0, pPrev, 1, 0, tmpPage->data); + int32_t ret = compare_a(pOrderDesc, 1, 0, pPrev, 1, 0, tmpBuffer->data); pOrderDesc->orderIdx.numOfOrderedCols = numOfCols; return (ret == 0); @@ -602,7 +604,7 @@ void tscLocalReducerEnvDestroy(tExtMemBuffer **pMemBuffer, tOrderDescriptor *pDe * @param treeList * @return the number of remain input source. if ret == 0, all data has been handled */ -int32_t loadNewDataFromDiskFor(SLocalReducer *pLocalReducer, SLocalDataSrc *pOneInterDataSrc, +int32_t loadNewDataFromDiskFor(SLocalReducer *pLocalReducer, SLocalDataSource *pOneInterDataSrc, bool *needAdjustLoserTree) { pOneInterDataSrc->rowIdx = 0; pOneInterDataSrc->pageId += 1; @@ -629,8 +631,8 @@ int32_t loadNewDataFromDiskFor(SLocalReducer *pLocalReducer, SLocalDataSrc *pOne return pLocalReducer->numOfBuffer; } -void loadDataIntoMemAndAdjustLoserTree(SLocalReducer *pLocalReducer, SLocalDataSrc *pOneInterDataSrc, - SLoserTreeInfo *pTree) { +void adjustLoserTreeFromNewData(SLocalReducer *pLocalReducer, SLocalDataSource *pOneInterDataSrc, + SLoserTreeInfo *pTree) { /* * load a new data page into memory for intermediate dataset source, * since it's last record in buffer has been chosen to be processed, as the winner of loser-tree @@ -662,10 +664,10 @@ void loadDataIntoMemAndAdjustLoserTree(SLocalReducer *pLocalReducer, SLocalDataS } } -void savePrevRecordAndSetupInterpoInfo(SLocalReducer *pLocalReducer, SSqlCmd *pCmd, - SInterpolationInfo *pInterpoInfo) { // discard following dataset in the - // same group and reset the - // interpolation information +void savePrevRecordAndSetupInterpoInfo( + SLocalReducer *pLocalReducer, SSqlCmd *pCmd, + SInterpolationInfo + *pInterpoInfo) { // discard following dataset in the same group and reset the interpolation information int64_t stime = (pCmd->stime < pCmd->etime) ? pCmd->stime : pCmd->etime; int64_t revisedSTime = taosGetIntervalStartTimestamp(stime, pCmd->nAggTimeInterval, pCmd->intervalTimeUnit); @@ -749,7 +751,7 @@ static void doInterpolateResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, boo tColModelErase(pLocalReducer->resColModel, pFinalDataPage, prevSize, 0, pCmd->limit.offset - 1); /* remove the hole in column model */ - tColModelCompress(pLocalReducer->resColModel, pFinalDataPage, prevSize); + tColModelCompact(pLocalReducer->resColModel, pFinalDataPage, prevSize); pRes->numOfRows -= pCmd->limit.offset; pRes->numOfTotal -= pCmd->limit.offset; @@ -772,7 +774,7 @@ static void doInterpolateResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, boo pRes->numOfRows -= overFlow; pFinalDataPage->numOfElems -= overFlow; - tColModelCompress(pLocalReducer->resColModel, pFinalDataPage, prevSize); + tColModelCompact(pLocalReducer->resColModel, pFinalDataPage, prevSize); /* set remain data to be discarded, and reset the interpolation information */ savePrevRecordAndSetupInterpoInfo(pLocalReducer, pCmd, &pLocalReducer->interpolationInfo); @@ -892,21 +894,21 @@ static void doInterpolateResult(SSqlObj *pSql, SLocalReducer *pLocalReducer, boo free(srcData); } -static void savePrevRecord(SLocalReducer *pLocalReducer, tFilePage *tmpPages) { +static void savePreviousRow(SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) { tColModel *pColModel = pLocalReducer->pDesc->pSchema; - assert(pColModel->maxCapacity == 1 && tmpPages->numOfElems == 1); + assert(pColModel->maxCapacity == 1 && tmpBuffer->numOfElems == 1); // copy to previous temp buffer for (int32_t i = 0; i < pLocalReducer->pDesc->pSchema->numOfCols; ++i) { - memcpy(pLocalReducer->prevRowOfInput + pColModel->colOffset[i], tmpPages->data + pColModel->colOffset[i], + memcpy(pLocalReducer->prevRowOfInput + pColModel->colOffset[i], tmpBuffer->data + pColModel->colOffset[i], pColModel->pFields[i].bytes); } - tmpPages->numOfElems = 0; + tmpBuffer->numOfElems = 0; pLocalReducer->hasPrevRow = true; } -static void handleUnprocessedRow(SLocalReducer *pLocalReducer, SSqlCmd *pCmd, tFilePage *tmpPages) { +static void handleUnprocessedRow(SLocalReducer *pLocalReducer, SSqlCmd *pCmd, tFilePage *tmpBuffer) { if (pLocalReducer->hasUnprocessedRow) { for (int32_t j = 0; j < pCmd->fieldsInfo.numOfOutputCols; ++j) { SSqlExpr *pExpr = tscSqlExprGet(pCmd, j); @@ -922,7 +924,7 @@ static void handleUnprocessedRow(SLocalReducer *pLocalReducer, SSqlCmd *pCmd, tF pLocalReducer->hasUnprocessedRow = false; // copy to previous temp buffer - savePrevRecord(pLocalReducer, tmpPages); + savePreviousRow(pLocalReducer, tmpBuffer); } } @@ -1005,7 +1007,7 @@ int32_t finalizeRes(SSqlCmd *pCmd, SLocalReducer *pLocalReducer) { * results generated by simple aggregation function, we merge them all into one points * *Exception*: column projection query, required no merge procedure */ -bool needToMerge(SSqlCmd *pCmd, SLocalReducer *pLocalReducer, tFilePage *tmpPages) { +bool needToMerge(SSqlCmd *pCmd, SLocalReducer *pLocalReducer, tFilePage *tmpBuffer) { int32_t ret = 0; // merge all result by default int16_t functionId = tscSqlExprGet(pCmd, 0)->sqlFuncId; @@ -1016,9 +1018,9 @@ bool needToMerge(SSqlCmd *pCmd, SLocalReducer *pLocalReducer, tFilePage *tmpPage if (pDesc->orderIdx.numOfOrderedCols > 0) { if (pDesc->tsOrder == TSQL_SO_ASC) { // asc // todo refactor comparator - ret = compare_a(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpPages->data); + ret = compare_a(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpBuffer->data); } else { // desc - ret = compare_d(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpPages->data); + ret = compare_d(pLocalReducer->pDesc, 1, 0, pLocalReducer->prevRowOfInput, 1, 0, tmpBuffer->data); } } } @@ -1027,23 +1029,55 @@ bool needToMerge(SSqlCmd *pCmd, SLocalReducer *pLocalReducer, tFilePage *tmpPage return (ret == 0); } -void savePreGroupNumOfRes(SSqlRes *pRes) { - // pRes->numOfGroups += 1; - // pRes->pGroupRec = realloc(pRes->pGroupRec, - // pRes->numOfGroups*sizeof(SResRec)); - // +static bool reachGroupResultLimit(SSqlCmd *pCmd, SSqlRes *pRes) { + return (pRes->numOfGroups >= pCmd->glimit.limit && pCmd->glimit.limit >= 0); +} + +static bool saveGroupResultInfo(SSqlObj *pSql) { + SSqlCmd *pCmd = &pSql->cmd; + SSqlRes *pRes = &pSql->res; + + pRes->numOfGroups += 1; + + // the output group is limited by the glimit clause + if (reachGroupResultLimit(pCmd, pRes)) { + return true; + } + + // pRes->pGroupRec = realloc(pRes->pGroupRec, pRes->numOfGroups*sizeof(SResRec)); // pRes->pGroupRec[pRes->numOfGroups-1].numOfRows = pRes->numOfRows; // pRes->pGroupRec[pRes->numOfGroups-1].numOfTotal = pRes->numOfTotal; + + return false; } -void doGenerateFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, - bool doneOuput) { // there are merged results in buffer, flush to client +/** + * + * @param pSql + * @param pLocalReducer + * @param noMoreCurrentGroupRes + * @return if current group is skipped, return false, and do NOT record it into pRes->numOfGroups + */ +bool doGenerateFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, bool noMoreCurrentGroupRes) { SSqlCmd * pCmd = &pSql->cmd; SSqlRes * pRes = &pSql->res; tFilePage *pResBuf = pLocalReducer->pResultBuf; tColModel *pModel = pLocalReducer->resColModel; - tColModelCompress(pModel, pResBuf, pModel->maxCapacity); + pRes->code = TSDB_CODE_SUCCESS; + + /* + * ignore the output of the current group since this group is skipped by user + * We set the numOfRows to be 0 and discard the possible remain results. + */ + if (pCmd->glimit.offset > 0) { + pRes->numOfRows = 0; + pCmd->glimit.offset -= 1; + pLocalReducer->discard = !noMoreCurrentGroupRes; + return false; + } + + tColModelCompact(pModel, pResBuf, pModel->maxCapacity); memcpy(pLocalReducer->pBufForInterpo, pResBuf->data, pLocalReducer->nResultBufSize); #ifdef _DEBUG_VIEW @@ -1061,9 +1095,9 @@ void doGenerateFinalResults(SSqlObj *pSql, SLocalReducer *pLocalReducer, } taosInterpoSetStartInfo(&pLocalReducer->interpolationInfo, pResBuf->numOfElems, pCmd->interpoType); - doInterpolateResult(pSql, pLocalReducer, doneOuput); + doInterpolateResult(pSql, pLocalReducer, noMoreCurrentGroupRes); - pRes->code = TSDB_CODE_SUCCESS; + return true; } void resetOutputBuf(SSqlCmd *pCmd, SLocalReducer *pLocalReducer) { // reset output buffer to the beginning @@ -1075,10 +1109,8 @@ void resetOutputBuf(SSqlCmd *pCmd, SLocalReducer *pLocalReducer) { // reset out memset(pLocalReducer->pResultBuf, 0, pLocalReducer->nResultBufSize + sizeof(tFilePage)); } -static void setUpForNewGroupRes(SSqlRes *pRes, SSqlCmd *pCmd, SLocalReducer *pLocalReducer) { - /* - * In handling data in other groups, we need to reset the interpolation information for a new group data - */ +static void resetEnvForNewResultset(SSqlRes *pRes, SSqlCmd *pCmd, SLocalReducer *pLocalReducer) { + //In handling data in other groups, we need to reset the interpolation information for a new group data pRes->numOfRows = 0; pRes->numOfTotal = 0; pCmd->limit.offset = pLocalReducer->offset; @@ -1093,41 +1125,49 @@ static void setUpForNewGroupRes(SSqlRes *pRes, SSqlCmd *pCmd, SLocalReducer *pLo } } -int32_t tscLocalDoReduce(SSqlObj *pSql) { +static bool isAllSourcesCompleted(SLocalReducer *pLocalReducer) { + return (pLocalReducer->numOfBuffer == pLocalReducer->numOfCompleted); +} + +static bool doInterpolationForCurrentGroup(SSqlObj *pSql) { SSqlCmd *pCmd = &pSql->cmd; SSqlRes *pRes = &pSql->res; - if (pSql->signature != pSql || pRes == NULL || pRes->pLocalReducer == NULL) { // all data has been processed - tscTrace("%s call the drop local reducer", __FUNCTION__); + SLocalReducer * pLocalReducer = pRes->pLocalReducer; + SInterpolationInfo *pInterpoInfo = &pLocalReducer->interpolationInfo; - tscDestroyLocalReducer(pSql); - pRes->numOfRows = 0; - pRes->row = 0; - return 0; - } + if (taosHasRemainsDataForInterpolation(pInterpoInfo)) { + assert(pCmd->interpoType != TSDB_INTERPO_NONE); - pRes->row = 0; - pRes->numOfRows = 0; + tFilePage *pFinalDataBuf = pLocalReducer->pResultBuf; + int64_t etime = *(int64_t *)(pFinalDataBuf->data + TSDB_KEYSIZE * (pInterpoInfo->numOfRawDataInRows - 1)); - SLocalReducer *pLocalReducer = pRes->pLocalReducer; + int32_t remain = taosNumOfRemainPoints(pInterpoInfo); + TSKEY ekey = taosGetRevisedEndKey(etime, pCmd->order.order, pCmd->nAggTimeInterval, pCmd->intervalTimeUnit); + int32_t rows = taosGetNumOfResultWithInterpo(pInterpoInfo, (TSKEY *)pLocalReducer->pBufForInterpo, remain, + pCmd->nAggTimeInterval, ekey, pLocalReducer->resColModel->maxCapacity); + if (rows > 0) { // do interpo + doInterpolateResult(pSql, pLocalReducer, false); + } - // set the local reduce in progress - int32_t prevStatus = - __sync_val_compare_and_swap_32(&pLocalReducer->status, TSC_LOCALREDUCE_READY, TSC_LOCALREDUCE_IN_PROGRESS); - if (prevStatus != TSC_LOCALREDUCE_READY || pLocalReducer == NULL) { - assert(prevStatus == TSC_LOCALREDUCE_TOBE_FREED); - /* it is in tscDestroyLocalReducer function already */ - return 0; + return true; + } else { + return false; } +} +static bool doHandleLastRemainData(SSqlObj *pSql) { + SSqlCmd *pCmd = &pSql->cmd; + SSqlRes *pRes = &pSql->res; + + SLocalReducer * pLocalReducer = pRes->pLocalReducer; SInterpolationInfo *pInterpoInfo = &pLocalReducer->interpolationInfo; - tFilePage * tmpPages = pLocalReducer->pTempBuffer; - bool prevGroupDone = (!pLocalReducer->discard) && pLocalReducer->hasUnprocessedRow; + bool prevGroupCompleted = (!pLocalReducer->discard) && pLocalReducer->hasUnprocessedRow; - if ((pLocalReducer->numOfBuffer == pLocalReducer->numOfCompleted && !pLocalReducer->hasPrevRow) || - pLocalReducer->pLocalDataSrc[0] == NULL || prevGroupDone) { - /* if interpoType == TSDB_INTERPO_NONE, return directly */ + if ((isAllSourcesCompleted(pLocalReducer) && !pLocalReducer->hasPrevRow) || pLocalReducer->pLocalDataSrc[0] == NULL || + prevGroupCompleted) { + // if interpoType == TSDB_INTERPO_NONE, return directly if (pCmd->interpoType != TSDB_INTERPO_NONE) { int64_t etime = (pCmd->stime < pCmd->etime) ? pCmd->etime : pCmd->stime; @@ -1139,54 +1179,117 @@ int32_t tscLocalDoReduce(SSqlObj *pSql) { } } - /* numOfRows == 0, means no interpolation results are generated yet */ - if (pRes->numOfRows == 0) { - /* local reduce is completed */ - if ((pLocalReducer->numOfBuffer == pLocalReducer->numOfCompleted) && (!pLocalReducer->hasUnprocessedRow)) { - pLocalReducer->status = TSC_LOCALREDUCE_READY; - // set the flag, taos_free_result can release this result. - return 0; - } else { - /* start for process result for a new group */ - savePreGroupNumOfRes(pRes); - setUpForNewGroupRes(pRes, pCmd, pLocalReducer); - } - } else { - pLocalReducer->status = TSC_LOCALREDUCE_READY; - // set the flag, taos_free_result can release this result. - return 0; + /* + * 1. numOfRows == 0, means no interpolation results are generated. + * 2. if all local data sources are consumed, and no un-processed rows exist. + * + * No results will be generated and query completed. + */ + if (pRes->numOfRows > 0 || (isAllSourcesCompleted(pLocalReducer) && (!pLocalReducer->hasUnprocessedRow))) { + return true; } + + // start to process result for a new group and save the result info of previous group + if (saveGroupResultInfo(pSql)) { + return true; + } + + resetEnvForNewResultset(pRes, pCmd, pLocalReducer); } - if (taosHasNoneInterpoPoints(pInterpoInfo)) { - assert(pCmd->interpoType != TSDB_INTERPO_NONE); + return false; +} - tFilePage *pFinalDataPage = pLocalReducer->pResultBuf; - int64_t etime = *(int64_t *)(pFinalDataPage->data + TSDB_KEYSIZE * (pInterpoInfo->numOfRawDataInRows - 1)); +static void doMergeWithPrevRows(SSqlObj *pSql, int32_t numOfRes) { + SSqlCmd * pCmd = &pSql->cmd; + SSqlRes * pRes = &pSql->res; + SLocalReducer *pLocalReducer = pRes->pLocalReducer; - int32_t remain = taosNumOfRemainPoints(pInterpoInfo); - TSKEY ekey = taosGetRevisedEndKey(etime, pCmd->order.order, pCmd->nAggTimeInterval, pCmd->intervalTimeUnit); - int32_t rows = taosGetNumOfResultWithInterpo(pInterpoInfo, (TSKEY *)pLocalReducer->pBufForInterpo, remain, - pCmd->nAggTimeInterval, ekey, pLocalReducer->resColModel->maxCapacity); - if (rows > 0) { // do interpo - doInterpolateResult(pSql, pLocalReducer, false); + for (int32_t k = 0; k < pCmd->fieldsInfo.numOfOutputCols; ++k) { + SSqlExpr *pExpr = tscSqlExprGet(pCmd, k); + + pLocalReducer->pCtx[k].aOutputBuf += pLocalReducer->pCtx[k].outputBytes * numOfRes; + + // set the correct output timestamp column position + if (pExpr->sqlFuncId == TSDB_FUNC_TOP_DST || pExpr->sqlFuncId == TSDB_FUNC_BOTTOM_DST) { + pLocalReducer->pCtx[k].ptsOutputBuf = ((char *)pLocalReducer->pCtx[k].ptsOutputBuf + TSDB_KEYSIZE * numOfRes); } - pLocalReducer->status = TSC_LOCALREDUCE_READY; - // set the flag, taos_free_result can release this result. + /* set the parameters for the SQLFunctionCtx */ + tVariantAssign(&pLocalReducer->pCtx[k].param[0], &pExpr->param[0]); + + aAggs[pExpr->sqlFuncId].init(&pLocalReducer->pCtx[k]); + pLocalReducer->pCtx[k].currentStage = SECONDARY_STAGE_MERGE; + aAggs[pExpr->sqlFuncId].distSecondaryMergeFunc(&pLocalReducer->pCtx[k]); + } +} + +static void doExecuteSecondaryMerge(SSqlObj *pSql) { + SSqlCmd * pCmd = &pSql->cmd; + SSqlRes * pRes = &pSql->res; + SLocalReducer *pLocalReducer = pRes->pLocalReducer; + + for (int32_t j = 0; j < pCmd->fieldsInfo.numOfOutputCols; ++j) { + SSqlExpr *pExpr = tscSqlExprGet(pCmd, j); + + tVariantAssign(&pLocalReducer->pCtx[j].param[0], &pExpr->param[0]); + pLocalReducer->pCtx[j].numOfIteratedElems = 0; + pLocalReducer->pCtx[j].currentStage = 0; + + aAggs[pExpr->sqlFuncId].init(&pLocalReducer->pCtx[j]); + pLocalReducer->pCtx[j].currentStage = SECONDARY_STAGE_MERGE; + + aAggs[pExpr->sqlFuncId].distSecondaryMergeFunc(&pLocalReducer->pCtx[j]); + } +} + +int32_t tscLocalDoReduce(SSqlObj *pSql) { + SSqlCmd *pCmd = &pSql->cmd; + SSqlRes *pRes = &pSql->res; + + if (pSql->signature != pSql || pRes == NULL || pRes->pLocalReducer == NULL) { // all data has been processed + tscTrace("%s call the drop local reducer", __FUNCTION__); + + tscDestroyLocalReducer(pSql); + pRes->numOfRows = 0; + pRes->row = 0; return 0; } + pRes->row = 0; + pRes->numOfRows = 0; + + SLocalReducer *pLocalReducer = pRes->pLocalReducer; + + // set the data merge in progress + int32_t prevStatus = + __sync_val_compare_and_swap_32(&pLocalReducer->status, TSC_LOCALREDUCE_READY, TSC_LOCALREDUCE_IN_PROGRESS); + if (prevStatus != TSC_LOCALREDUCE_READY || pLocalReducer == NULL) { + assert(prevStatus == TSC_LOCALREDUCE_TOBE_FREED); + /* it is in tscDestroyLocalReducer function already */ + return TSDB_CODE_SUCCESS; + } + + tFilePage *tmpBuffer = pLocalReducer->pTempBuffer; + + if (doHandleLastRemainData(pSql)) { + pLocalReducer->status = TSC_LOCALREDUCE_READY; // set the flag, taos_free_result can release this result. + return TSDB_CODE_SUCCESS; + } + + if (doInterpolationForCurrentGroup(pSql)) { + pLocalReducer->status = TSC_LOCALREDUCE_READY; // set the flag, taos_free_result can release this result. + return TSDB_CODE_SUCCESS; + } + SLoserTreeInfo *pTree = pLocalReducer->pLoserTree; // clear buffer - handleUnprocessedRow(pLocalReducer, pCmd, tmpPages); + handleUnprocessedRow(pLocalReducer, pCmd, tmpBuffer); tColModel *pModel = pLocalReducer->pDesc->pSchema; while (1) { - _reduce_retrieve: - if (pLocalReducer->numOfBuffer == pLocalReducer->numOfCompleted) { - pRes->numOfRows = 0; + if (isAllSourcesCompleted(pLocalReducer)) { break; } @@ -1194,12 +1297,12 @@ int32_t tscLocalDoReduce(SSqlObj *pSql) { printf("chosen data in pTree[0] = %d\n", pTree->pNode[0].index); #endif assert((pTree->pNode[0].index < pLocalReducer->numOfBuffer) && (pTree->pNode[0].index >= 0) && - tmpPages->numOfElems == 0); + tmpBuffer->numOfElems == 0); // chosen from loser tree - SLocalDataSrc *pOneDataSrc = pLocalReducer->pLocalDataSrc[pTree->pNode[0].index]; + SLocalDataSource *pOneDataSrc = pLocalReducer->pLocalDataSrc[pTree->pNode[0].index]; - tColModelAppend(pModel, tmpPages, pOneDataSrc->filePage.data, pOneDataSrc->rowIdx, 1, + tColModelAppend(pModel, tmpBuffer, pOneDataSrc->filePage.data, pOneDataSrc->rowIdx, 1, pOneDataSrc->pMemBuffer->pColModel->maxCapacity); #if defined(_DEBUG_VIEW) @@ -1207,35 +1310,42 @@ int32_t tscLocalDoReduce(SSqlObj *pSql) { SSrcColumnInfo colInfo[256] = {0}; tscGetSrcColumnInfo(colInfo, pCmd); - tColModelDisplayEx(pModel, tmpPages->data, tmpPages->numOfElems, pModel->maxCapacity, colInfo); + tColModelDisplayEx(pModel, tmpBuffer->data, tmpBuffer->numOfElems, pModel->maxCapacity, colInfo); #endif + if (pLocalReducer->discard) { assert(pLocalReducer->hasUnprocessedRow == false); /* current record belongs to the same group of previous record, need to discard it */ - if (isSameGroupOfPrev(pCmd, pLocalReducer, pLocalReducer->discardData->data, tmpPages)) { - tmpPages->numOfElems = 0; + if (isSameGroup(pCmd, pLocalReducer, pLocalReducer->discardData->data, tmpBuffer)) { + tmpBuffer->numOfElems = 0; pOneDataSrc->rowIdx += 1; - loadDataIntoMemAndAdjustLoserTree(pLocalReducer, pOneDataSrc, pTree); - /* all inputs are exhausted, abort current process */ - if (pLocalReducer->numOfBuffer == pLocalReducer->numOfCompleted) { + adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree); + + // all inputs are exhausted, abort current process + if (isAllSourcesCompleted(pLocalReducer)) { break; } - /* since it belongs to the same group, ignore following records */ + // data belongs to the same group needs to be discarded continue; } else { pLocalReducer->discard = false; pLocalReducer->discardData->numOfElems = 0; - savePreGroupNumOfRes(pRes); - setUpForNewGroupRes(pRes, pCmd, pLocalReducer); + if (saveGroupResultInfo(pSql)) { + pLocalReducer->status = TSC_LOCALREDUCE_READY; + return TSDB_CODE_SUCCESS; + } + + resetEnvForNewResultset(pRes, pCmd, pLocalReducer); } } if (pLocalReducer->hasPrevRow) { - if (needToMerge(pCmd, pLocalReducer, tmpPages)) { // belong to the group of the previous row + if (needToMerge(pCmd, pLocalReducer, tmpBuffer)) { + // belong to the group of the previous row, continue process it for (int32_t j = 0; j < pCmd->fieldsInfo.numOfOutputCols; ++j) { SSqlExpr *pExpr = tscSqlExprGet(pCmd, j); tVariantAssign(&pLocalReducer->pCtx[j].param[0], &pExpr->param[0]); @@ -1244,109 +1354,86 @@ int32_t tscLocalDoReduce(SSqlObj *pSql) { } // copy to buffer - savePrevRecord(pLocalReducer, tmpPages); - } else { // reduce the previous is completed, start a new one + savePreviousRow(pLocalReducer, tmpBuffer); + } else { + /* + * current row does not belong to the group of previous row. + * so the processing of previous group is completed. + */ int32_t numOfRes = finalizeRes(pCmd, pLocalReducer); - bool sameGroup = isSameGroupOfPrev(pCmd, pLocalReducer, pLocalReducer->prevRowOfInput, tmpPages); + bool sameGroup = isSameGroup(pCmd, pLocalReducer, pLocalReducer->prevRowOfInput, tmpBuffer); tFilePage *pResBuf = pLocalReducer->pResultBuf; /* - * if the previous group does NOTE generate any result - * (pResBuf->numOfElems == 0), + * if the previous group does NOT generate any result (pResBuf->numOfElems == 0), * continue to process results instead of return results. */ if ((!sameGroup && pResBuf->numOfElems > 0) || (pResBuf->numOfElems == pLocalReducer->resColModel->maxCapacity)) { // does not belong to the same group - assert(pResBuf->numOfElems > 0); - - doGenerateFinalResults(pSql, pLocalReducer, !sameGroup); + bool notSkipped = doGenerateFinalResults(pSql, pLocalReducer, !sameGroup); + // this row needs to discard, since it belongs to the group of previous if (pLocalReducer->discard && sameGroup) { - /* this row needs to discard, since it belongs to the group of previous */ pLocalReducer->hasUnprocessedRow = false; - tmpPages->numOfElems = 0; + tmpBuffer->numOfElems = 0; } else { + // current row does not belongs to the previous group, so it is not be handled yet. pLocalReducer->hasUnprocessedRow = true; } resetOutputBuf(pCmd, pLocalReducer); pOneDataSrc->rowIdx += 1; - /* here we do not check the return value */ - loadDataIntoMemAndAdjustLoserTree(pLocalReducer, pOneDataSrc, pTree); + // here we do not check the return value + adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree); assert(pLocalReducer->status == TSC_LOCALREDUCE_IN_PROGRESS); if (pRes->numOfRows == 0) { - handleUnprocessedRow(pLocalReducer, pCmd, tmpPages); + handleUnprocessedRow(pLocalReducer, pCmd, tmpBuffer); if (!sameGroup) { - /* previous group is done, we start a new one by continuing to - * retrieve data */ - savePreGroupNumOfRes(pRes); - setUpForNewGroupRes(pRes, pCmd, pLocalReducer); + /* + * previous group is done, prepare for the next group + * If previous group is not skipped, keep it in pRes->numOfGroups + */ + if (notSkipped && saveGroupResultInfo(pSql)) { + pLocalReducer->status = TSC_LOCALREDUCE_READY; + return TSDB_CODE_SUCCESS; + } + + resetEnvForNewResultset(pRes, pCmd, pLocalReducer); } - - goto _reduce_retrieve; } else { /* * if next record belongs to a new group, we do not handle this record here. * We start the process in a new round. */ if (sameGroup) { - handleUnprocessedRow(pLocalReducer, pCmd, tmpPages); + handleUnprocessedRow(pLocalReducer, pCmd, tmpBuffer); } } - pLocalReducer->status = TSC_LOCALREDUCE_READY; - // set the flag, taos_free_result can release this result. - return 0; - } else { // result buffer is not full - for (int32_t k = 0; k < pCmd->fieldsInfo.numOfOutputCols; ++k) { - SSqlExpr *pExpr = tscSqlExprGet(pCmd, k); - - pLocalReducer->pCtx[k].aOutputBuf += pLocalReducer->pCtx[k].outputBytes * numOfRes; - if (pExpr->sqlFuncId == TSDB_FUNC_TOP_DST || pExpr->sqlFuncId == TSDB_FUNC_BOTTOM_DST) { - pLocalReducer->pCtx[k].ptsOutputBuf = - ((char *)pLocalReducer->pCtx[k].ptsOutputBuf + TSDB_KEYSIZE * numOfRes); - } - - /* set the parameters for the SQLFunctionCtx */ - tVariantAssign(&pLocalReducer->pCtx[k].param[0], &pExpr->param[0]); - - aAggs[pExpr->sqlFuncId].init(&pLocalReducer->pCtx[k]); - pLocalReducer->pCtx[k].currentStage = SECONDARY_STAGE_MERGE; - aAggs[pExpr->sqlFuncId].distSecondaryMergeFunc(&pLocalReducer->pCtx[k]); + // current group has no result, + if (pRes->numOfRows == 0) { + continue; + } else { + pLocalReducer->status = TSC_LOCALREDUCE_READY; // set the flag, taos_free_result can release this result. + return TSDB_CODE_SUCCESS; } - - savePrevRecord(pLocalReducer, tmpPages); + } else { // result buffer is not full + doMergeWithPrevRows(pSql, numOfRes); + savePreviousRow(pLocalReducer, tmpBuffer); } } - } else { // put to previous input row for comparision - for (int32_t j = 0; j < pCmd->fieldsInfo.numOfOutputCols; ++j) { - SSqlExpr *pExpr = tscSqlExprGet(pCmd, j); - - tVariantAssign(&pLocalReducer->pCtx[j].param[0], &pExpr->param[0]); - pLocalReducer->pCtx[j].numOfIteratedElems = 0; - pLocalReducer->pCtx[j].currentStage = 0; - - aAggs[pExpr->sqlFuncId].init(&pLocalReducer->pCtx[j]); - pLocalReducer->pCtx[j].currentStage = SECONDARY_STAGE_MERGE; - - aAggs[pExpr->sqlFuncId].distSecondaryMergeFunc(&pLocalReducer->pCtx[j]); - } - - // copy to buffer - savePrevRecord(pLocalReducer, tmpPages); + } else { + doExecuteSecondaryMerge(pSql); + savePreviousRow(pLocalReducer, tmpBuffer); // copy the processed row to buffer } pOneDataSrc->rowIdx += 1; - - loadDataIntoMemAndAdjustLoserTree(pLocalReducer, pOneDataSrc, pTree); - if (pLocalReducer->numOfCompleted == pLocalReducer->numOfBuffer) { - break; - } + adjustLoserTreeFromNewData(pLocalReducer, pOneDataSrc, pTree); } if (pLocalReducer->hasPrevRow) { @@ -1358,8 +1445,7 @@ int32_t tscLocalDoReduce(SSqlObj *pSql) { } assert(pLocalReducer->status == TSC_LOCALREDUCE_IN_PROGRESS && pRes->row == 0); - pLocalReducer->status = TSC_LOCALREDUCE_READY; - // set the flag, taos_free_result can release this result. + pLocalReducer->status = TSC_LOCALREDUCE_READY; // set the flag, taos_free_result can release this result. return TSDB_CODE_SUCCESS; } @@ -1378,7 +1464,8 @@ void tscInitResObjForLocalQuery(SSqlObj *pObj, int32_t numOfRes, int32_t rowLen) pRes->pLocalReducer = (SLocalReducer *)calloc(1, sizeof(SLocalReducer)); /* - * we need one additional byte space the sprintf function needs one additional space to put '\0' at the end of string + * we need one additional byte space + * the sprintf function needs one additional space to put '\0' at the end of string */ size_t allocSize = numOfRes * rowLen + sizeof(tFilePage) + 1; pRes->pLocalReducer->pResultBuf = (tFilePage *)calloc(1, allocSize); diff --git a/src/client/src/tscServer.c b/src/client/src/tscServer.c index ba645c53b2368a496da7816024c12a8b8199df84..f661fd853ca61e85d5cf0becdefcd85f005ceb4b 100644 --- a/src/client/src/tscServer.c +++ b/src/client/src/tscServer.c @@ -358,14 +358,17 @@ void *tscProcessMsgFromServer(char *msg, void *ahandle, void *thandle) { pRes->code = TSDB_CODE_SUCCESS; } - tscTrace("%p cmd:%d code:%d rsp len:%d", pSql, pCmd->command, pRes->code, pRes->rspLen); - /* * There is not response callback function for submit response. * The actual inserted number of points is the first number. */ if (pMsg->msgType == TSDB_MSG_TYPE_SUBMIT_RSP) { pRes->numOfRows += *(int32_t *)pRes->pRsp; + + tscTrace("%p cmd:%d code:%d, inserted rows:%d, rsp len:%d", pSql, pCmd->command, pRes->code, + *(int32_t *)pRes->pRsp, pRes->rspLen); + } else { + tscTrace("%p cmd:%d code:%d rsp len:%d", pSql, pCmd->command, pRes->code, pRes->rspLen); } } @@ -421,7 +424,7 @@ void *tscProcessMsgFromServer(char *msg, void *ahandle, void *thandle) { return ahandle; } -static SSqlObj* tscCreateSqlObjForSubquery(SSqlObj *pSql, SRetrieveSupport *trsupport, SSqlObj* pOld); +static SSqlObj* tscCreateSqlObjForSubquery(SSqlObj *pSql, SRetrieveSupport *trsupport, SSqlObj* prevSqlObj); static int tscLaunchMetricSubQueries(SSqlObj *pSql); int tscProcessSql(SSqlObj *pSql) { @@ -430,12 +433,6 @@ int tscProcessSql(SSqlObj *pSql) { tscTrace("%p SQL cmd:%d will be processed, name:%s", pSql, pSql->cmd.command, pSql->cmd.name); - // whether don't judge 'isInsertFromFile' ? - if (pSql->cmd.command == TSDB_SQL_INSERT && pCmd->isInsertFromFile == 1) { - // pCmd->isInsertFromFile = 0; // lihui: can not clear the flag - return 0; - } - pSql->retry = 0; if (pSql->cmd.command < TSDB_SQL_MGMT) { pSql->maxRetry = 2; @@ -595,7 +592,6 @@ int tscLaunchMetricSubQueries(SSqlObj *pSql) { SSqlObj *pNew = tscCreateSqlObjForSubquery(pSql, trs, NULL); tscTrace("%p sub:%p launch subquery.orderOfSub:%d", pSql, pNew, pNew->cmd.vnodeIdx); - tscProcessSql(pNew); } @@ -665,7 +661,6 @@ static void tscHandleSubRetrievalError(SRetrieveSupport *trsupport, SSqlObj *pSq tscError("%p sub:%p abort further retrieval due to other queries failure,orderOfSub:%d,code:%d", pPObj, pSql, idx, *trsupport->code); } else { - if (trsupport->numOfRetry++ < MAX_NUM_OF_SUBQUERY_RETRY && *(trsupport->code) == TSDB_CODE_SUCCESS) { /* * current query failed, and the retry count is less than the available count, @@ -675,11 +670,12 @@ static void tscHandleSubRetrievalError(SRetrieveSupport *trsupport, SSqlObj *pSq // clear local saved number of results trsupport->localBuffer->numOfElems = 0; + pthread_mutex_unlock(&trsupport->queryMutex); SSqlObj *pNew = tscCreateSqlObjForSubquery(trsupport->pParentSqlObj, trsupport, pSql); tscTrace("%p sub:%p retrieve failed, code:%d, orderOfSub:%d, retry:%d, new SqlObj:%p", - trsupport->pParentSqlObj, pSql, numOfRows, idx, trsupport->numOfRetry, pNew); + trsupport->pParentSqlObj, pSql, numOfRows, idx, trsupport->numOfRetry, pNew); tscProcessSql(pNew); return; @@ -689,7 +685,6 @@ static void tscHandleSubRetrievalError(SRetrieveSupport *trsupport, SSqlObj *pSq tscError("%p sub:%p retrieve failed,code:%d,orderOfSub:%d failed.no more retry,set global code:%d", pPObj, pSql, numOfRows, idx, *trsupport->code); } - } if (__sync_add_and_fetch_32(trsupport->numOfFinished, 1) < trsupport->numOfVnodes) { @@ -778,7 +773,7 @@ void tscRetrieveFromVnodeCallBack(void *param, TAOS_RES *tres, int numOfRows) { tscTrace("%p sub:%p all data retrieved from ip:%u,vid:%d, numOfRows:%d, orderOfSub:%d", pPObj, pSql, pSvd->ip, pSvd->vnode, numOfRowsFromVnode, idx); - tColModelCompress(pDesc->pSchema, trsupport->localBuffer, pDesc->pSchema->maxCapacity); + tColModelCompact(pDesc->pSchema, trsupport->localBuffer, pDesc->pSchema->maxCapacity); #ifdef _DEBUG_VIEW printf("%ld rows data flushed to disk:\n", trsupport->localBuffer->numOfElems); @@ -877,7 +872,7 @@ void tscKillMetricQuery(SSqlObj *pSql) { tscTrace("%p metric query is cancelled", pSql); } -static SSqlObj* tscCreateSqlObjForSubquery(SSqlObj *pSql, SRetrieveSupport *trsupport, SSqlObj* prevSqlObj) { +SSqlObj* tscCreateSqlObjForSubquery(SSqlObj *pSql, SRetrieveSupport *trsupport, SSqlObj* prevSqlObj) { SSqlCmd *pCmd = &pSql->cmd; SSqlObj *pNew = (SSqlObj *)calloc(1, sizeof(SSqlObj)); @@ -2264,8 +2259,6 @@ int tscBuildMetricMetaMsg(SSqlObj *pSql) { SSqlGroupbyExpr *pGroupby = &pCmd->groupbyExpr; - pMetaMsg->limit = htobe64(pCmd->glimit.limit); - pMetaMsg->offset = htobe64(pCmd->glimit.offset); pMetaMsg->numOfTags = htons(pCmd->numOfReqTags); pMetaMsg->numOfGroupbyCols = htons(pGroupby->numOfGroupbyCols); @@ -2750,7 +2743,6 @@ static int32_t tscDoGetMeterMeta(SSqlObj *pSql, char *meterId) { } else { pNew->fp = tscMeterMetaCallBack; pNew->param = pSql; - pNew->sqlstr = strdup(pSql->sqlstr); code = tscProcessSql(pNew); diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index ea70292435affdd4b07b5ede9ef2c2d6e35c88ec..e71fa0792bf6484dc047543c294e7ec6658ade31 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -17,6 +17,7 @@ #include #include +#include "ihash.h" #include "taosmsg.h" #include "tcache.h" #include "tkey.h" @@ -31,9 +32,10 @@ /* * the detailed information regarding metric meta key is: - * fullmetername + '.' + querycond + '.' + [tagId1, tagId2,...] + '.' + group_orderType + '.' + limit + '.' + offset + * fullmetername + '.' + querycond + '.' + [tagId1, tagId2,...] + '.' + group_orderType + * * if querycond is null, its format is: - * fullmetername + '.' + '(nil)' + '.' + [tagId1, tagId2,...] + '.' + group_orderType + '.' + limit + '.' + offset + * fullmetername + '.' + '(nil)' + '.' + [tagId1, tagId2,...] + '.' + group_orderType */ void tscGetMetricMetaCacheKey(SSqlCmd* pCmd, char* keyStr) { char* pTagCondStr = NULL; @@ -60,8 +62,7 @@ void tscGetMetricMetaCacheKey(SSqlCmd* pCmd, char* keyStr) { pTagCondStr = strdup(tsGetMetricQueryCondPos(&pCmd->tagCond)); } - int32_t keyLen = sprintf(keyStr, "%s.%s.[%s].%d.%lld.%lld", pCmd->name, pTagCondStr, tagIdBuf, - pCmd->groupbyExpr.orderType, pCmd->glimit.limit, pCmd->glimit.offset); + int32_t keyLen = sprintf(keyStr, "%s.%s.[%s].%d", pCmd->name, pTagCondStr, tagIdBuf, pCmd->groupbyExpr.orderType); free(pTagCondStr); assert(keyLen <= TSDB_MAX_TAGS_LEN); @@ -142,8 +143,7 @@ bool tscProjectionQueryOnMetric(SSqlObj* pSql) { /* * In following cases, return false for project query on metric - * 1. failed to get metermeta from server; 2. not a metric; 3. limit 0; 4. - * show query, instead of a select query + * 1. failed to get metermeta from server; 2. not a metric; 3. limit 0; 4. show query, instead of a select query */ if (pCmd->pMeterMeta == NULL || !UTIL_METER_IS_METRIC(pCmd) || pCmd->command == TSDB_SQL_RETRIEVE_EMPTY_RESULT || pCmd->exprsInfo.numOfExprs == 0) { @@ -252,7 +252,7 @@ void tscDestroyResPointerInfo(SSqlRes* pRes) { } void tscfreeSqlCmdData(SSqlCmd* pCmd) { - tscDestroyBlockArrayList(&pCmd->pDataBlocks); + pCmd->pDataBlocks = tscDestroyBlockArrayList(pCmd->pDataBlocks); tscTagCondRelease(&pCmd->tagCond); tscClearFieldInfo(pCmd); @@ -334,20 +334,22 @@ void tscFreeSqlObj(SSqlObj* pSql) { free(pSql); } -SInsertedDataBlocks* tscCreateDataBlock(int32_t size) { - SInsertedDataBlocks* dataBuf = (SInsertedDataBlocks*)calloc(1, sizeof(SInsertedDataBlocks)); - dataBuf->nAllocSize = (uint32_t) size; +STableDataBlocks* tscCreateDataBlock(int32_t size) { + STableDataBlocks* dataBuf = (STableDataBlocks*)calloc(1, sizeof(STableDataBlocks)); + dataBuf->nAllocSize = (uint32_t)size; dataBuf->pData = calloc(1, dataBuf->nAllocSize); + dataBuf->ordered = true; + dataBuf->prevTS = INT64_MIN; return dataBuf; } -void tscDestroyDataBlock(SInsertedDataBlocks** pDataBlock) { - if (*pDataBlock == NULL) { +void tscDestroyDataBlock(STableDataBlocks* pDataBlock) { + if (pDataBlock == NULL) { return; } - tfree((*pDataBlock)->pData); - tfree(*pDataBlock); + tfree(pDataBlock->pData); + tfree(pDataBlock); } SDataBlockList* tscCreateBlockArrayList() { @@ -360,29 +362,31 @@ SDataBlockList* tscCreateBlockArrayList() { return pDataBlockArrayList; } -void tscDestroyBlockArrayList(SDataBlockList** pList) { - if (*pList == NULL) { - return; +void* tscDestroyBlockArrayList(SDataBlockList* pList) { + if (pList == NULL) { + return NULL; } - for (int32_t i = 0; i < (*pList)->nSize; i++) { - tscDestroyDataBlock(&(*pList)->pData[i]); + for (int32_t i = 0; i < pList->nSize; i++) { + tscDestroyDataBlock(pList->pData[i]); } - tfree((*pList)->pData); - tfree(*pList); + tfree(pList->pData); + tfree(pList); + + return NULL; } -int32_t tscCopyDataBlockToPayload(SSqlObj* pSql, SInsertedDataBlocks* pDataBlock) { +int32_t tscCopyDataBlockToPayload(SSqlObj* pSql, STableDataBlocks* pDataBlock) { SSqlCmd* pCmd = &pSql->cmd; pCmd->count = pDataBlock->numOfMeters; - strcpy(pCmd->name, pDataBlock->meterId); + strncpy(pCmd->name, pDataBlock->meterId, TSDB_METER_ID_LEN); tscAllocPayloadWithSize(pCmd, pDataBlock->nAllocSize); memcpy(pCmd->payload, pDataBlock->pData, pDataBlock->nAllocSize); - /* set the message length */ + // set the message length pCmd->payloadLen = pDataBlock->nAllocSize; return tscGetMeterMeta(pSql, pCmd->name); } @@ -390,12 +394,89 @@ int32_t tscCopyDataBlockToPayload(SSqlObj* pSql, SInsertedDataBlocks* pDataBlock void tscFreeUnusedDataBlocks(SDataBlockList* pList) { /* release additional memory consumption */ for (int32_t i = 0; i < pList->nSize; ++i) { - SInsertedDataBlocks* pDataBlock = pList->pData[i]; - pDataBlock->pData = realloc(pDataBlock->pData, (size_t) pDataBlock->size); - pDataBlock->nAllocSize = (uint32_t) pDataBlock->size; + STableDataBlocks* pDataBlock = pList->pData[i]; + pDataBlock->pData = realloc(pDataBlock->pData, pDataBlock->size); + pDataBlock->nAllocSize = (uint32_t)pDataBlock->size; } } +STableDataBlocks* tscCreateDataBlockEx(size_t size, int32_t rowSize, int32_t startOffset, char* name) { + STableDataBlocks *dataBuf = tscCreateDataBlock(size); + + dataBuf->rowSize = rowSize; + dataBuf->size = startOffset; + strncpy(dataBuf->meterId, name, TSDB_METER_ID_LEN); + return dataBuf; +} + +STableDataBlocks* tscGetDataBlockFromList(void* pHashList, SDataBlockList* pDataBlockList, int64_t id, int32_t size, + int32_t startOffset, int32_t rowSize, char* tableId) { + STableDataBlocks* dataBuf = NULL; + + STableDataBlocks** t1 = (STableDataBlocks**)taosGetIntHashData(pHashList, id); + if (t1 != NULL) { + dataBuf = *t1; + } + + if (dataBuf == NULL) { + dataBuf = tscCreateDataBlockEx((size_t) size, rowSize, startOffset, tableId); + dataBuf = *(STableDataBlocks**)taosAddIntHash(pHashList, id, (char*)&dataBuf); + tscAppendDataBlock(pDataBlockList, dataBuf); + } + + return dataBuf; +} + +void tscMergeTableDataBlocks(SSqlCmd* pCmd, SDataBlockList* pTableDataBlockList) { + void* pVnodeDataBlockHashList = taosInitIntHash(8, sizeof(void*), taosHashInt); + SDataBlockList* pVnodeDataBlockList = tscCreateBlockArrayList(); + + for (int32_t i = 0; i < pTableDataBlockList->nSize; ++i) { + STableDataBlocks* pOneTableBlock = pTableDataBlockList->pData[i]; + STableDataBlocks* dataBuf = + tscGetDataBlockFromList(pVnodeDataBlockHashList, pVnodeDataBlockList, pOneTableBlock->vgid, TSDB_PAYLOAD_SIZE, + tsInsertHeadSize, 0, pOneTableBlock->meterId); + + int64_t destSize = dataBuf->size + pOneTableBlock->size; + if (dataBuf->nAllocSize < destSize) { + while (dataBuf->nAllocSize < destSize) { + dataBuf->nAllocSize = dataBuf->nAllocSize * 1.5; + } + + char* tmp = realloc(dataBuf->pData, dataBuf->nAllocSize); + if (tmp != NULL) { + dataBuf->pData = tmp; + memset(dataBuf->pData + dataBuf->size, 0, dataBuf->nAllocSize - dataBuf->size); + } else { + // to do handle error + } + } + + SShellSubmitBlock* pBlocks = (SShellSubmitBlock*)pOneTableBlock->pData; + assert(pBlocks->numOfRows * pOneTableBlock->rowSize + sizeof(SShellSubmitBlock) == pOneTableBlock->size); + + pBlocks->numOfRows = (int16_t)sortRemoveDuplicates(pOneTableBlock, pBlocks->numOfRows); + + pBlocks->sid = htonl(pBlocks->sid); + pBlocks->uid = htobe64(pBlocks->uid); + pBlocks->sversion = htonl(pBlocks->sversion); + pBlocks->numOfRows = htons(pBlocks->numOfRows); + + memcpy(dataBuf->pData + dataBuf->size, pOneTableBlock->pData, pOneTableBlock->size); + + dataBuf->size += pOneTableBlock->size; + dataBuf->numOfMeters += 1; + } + + tscDestroyBlockArrayList(pTableDataBlockList); + + // free the table data blocks; + pCmd->pDataBlocks = pVnodeDataBlockList; + + tscFreeUnusedDataBlocks(pCmd->pDataBlocks); + taosCleanUpIntHash(pVnodeDataBlockHashList); +} + void tscCloseTscObj(STscObj* pObj) { pObj->signature = NULL; SSqlObj* pSql = pObj->pSql; @@ -821,15 +902,18 @@ int32_t tscValidateName(SSQLToken* pToken) { pToken->n = strdequote(pToken->z); strtrim(pToken->z); pToken->n = (uint32_t)strlen(pToken->z); - int len = tSQLGetToken(pToken->z, &pToken->type); + + int len = tSQLGetToken(pToken->z, &pToken->type); + + // single token, validate it if (len == pToken->n){ return validateQuoteToken(pToken); - } - else { + } else { sep = strnchrNoquote(pToken->z, TS_PATH_DELIMITER[0], pToken->n); if (sep == NULL) { return TSDB_CODE_INVALID_SQL; } + return tscValidateName(pToken); } } else { @@ -965,8 +1049,7 @@ void tscSetFreeHeatBeat(STscObj* pObj) { SSqlObj* pHeatBeat = pObj->pHb; assert(pHeatBeat == pHeatBeat->signature); - pHeatBeat->cmd.type = 1; // to denote the heart-beat timer close connection - // and free all allocated resources + pHeatBeat->cmd.type = 1; // to denote the heart-beat timer close connection and free all allocated resources } bool tscShouldFreeHeatBeat(SSqlObj* pHb) { @@ -1052,7 +1135,6 @@ void tscDoQuery(SSqlObj* pSql) { if (pCmd->command > TSDB_SQL_LOCAL) { tscProcessLocalCmd(pSql); } else { - // add to sql list, so that the show queries could get the query info if (pCmd->command == TSDB_SQL_SELECT) { tscAddIntoSqlList(pSql); } @@ -1061,18 +1143,19 @@ void tscDoQuery(SSqlObj* pSql) { pSql->cmd.vnodeIdx += 1; } - if (pSql->fp == NULL) { - if (0 == pCmd->isInsertFromFile) { - tscProcessSql(pSql); - tscProcessMultiVnodesInsert(pSql); // handle the multi-vnode insertion - } else if (1 == pCmd->isInsertFromFile) { - tscProcessMultiVnodesInsertForFile(pSql); - } else { - assert(false); - } + void* fp = pSql->fp; + + if (pCmd->isInsertFromFile == 1) { + tscProcessMultiVnodesInsertForFile(pSql); } else { + // pSql may be released in this function if it is a async insertion. tscProcessSql(pSql); - } + // handle the multi-vnode insertion for sync model + if (fp == NULL) { + assert(pSql->signature == pSql); + tscProcessMultiVnodesInsert(pSql); + } + } } } diff --git a/src/inc/textbuffer.h b/src/inc/textbuffer.h index e601a0c1293079f031a26a4e5dd2db38a1975a3e..f86e14ec8b87c1fea5701b836fa1a94da43bdf7c 100644 --- a/src/inc/textbuffer.h +++ b/src/inc/textbuffer.h @@ -184,7 +184,7 @@ void tColModelDisplayEx(tColModel *pModel, void *pData, int32_t numOfRows, int32 /* * compress data into consecutive block without hole in data */ -void tColModelCompress(tColModel *pModel, tFilePage *inputBuffer, int32_t maxElemsCapacity); +void tColModelCompact(tColModel *pModel, tFilePage *inputBuffer, int32_t maxElemsCapacity); void tColModelErase(tColModel *pModel, tFilePage *inputBuffer, int32_t maxCapacity, int32_t s, int32_t e); diff --git a/src/inc/tinterpolation.h b/src/inc/tinterpolation.h index 3592664804e7bce237da8f09ab75a3d511eebf4f..e8bbd66692a39145fd1dfe90e09d592897a9e493 100644 --- a/src/inc/tinterpolation.h +++ b/src/inc/tinterpolation.h @@ -69,7 +69,7 @@ int32_t taosGetNumOfResWithoutLimit(SInterpolationInfo *pInterpoInfo, int64_t *p * @param pInterpoInfo * @return */ -bool taosHasNoneInterpoPoints(SInterpolationInfo *pInterpoInfo); +bool taosHasRemainsDataForInterpolation(SInterpolationInfo *pInterpoInfo); int32_t taosNumOfRemainPoints(SInterpolationInfo *pInterpoInfo); diff --git a/src/system/inc/vnode.h b/src/system/inc/vnode.h index 9cbc7e664b89a1fb93c8d231f5f22c75710f3395..72f7d8e6a0466f9f545bcee605bb688aa70e353e 100644 --- a/src/system/inc/vnode.h +++ b/src/system/inc/vnode.h @@ -69,11 +69,12 @@ enum _sync_cmd { }; enum _meter_state { - TSDB_METER_STATE_READY, - TSDB_METER_STATE_IMPORTING, - TSDB_METER_STATE_UPDATING, - TSDB_METER_STATE_DELETING, - TSDB_METER_STATE_DELETED, + TSDB_METER_STATE_READY = 0x00, + TSDB_METER_STATE_INSERT = 0x01, + TSDB_METER_STATE_IMPORTING = 0x02, + TSDB_METER_STATE_UPDATING = 0x04, + TSDB_METER_STATE_DELETING = 0x10, + TSDB_METER_STATE_DELETED = 0x18, }; typedef struct { @@ -184,10 +185,10 @@ typedef struct _meter_obj { short sqlLen; char searchAlgorithm : 4; char compAlgorithm : 4; - char state : 5; // deleted or added, 1: added - char status : 3; // 0: ok, 1: stop stream computing + char status; // 0: ok, 1: stop stream computing char reserved[16]; + int state; int numOfQueries; char * pSql; void * pStream; @@ -499,7 +500,7 @@ int vnodeInitStore(); void vnodeCleanUpVnodes(); -void vnodeRemoveVnode(int vnode); +int vnodeRemoveVnode(int vnode); int vnodeCreateVnode(int vnode, SVnodeCfg *pCfg, SVPeerDesc *pDesc); diff --git a/src/system/inc/vnodeUtil.h b/src/system/inc/vnodeUtil.h index 9713b4da0dc98fb70dd5386ca2cdf52daca105aa..2f619c85aa83bffd6b30c4f2f0d6be4c4d3eda2f 100644 --- a/src/system/inc/vnodeUtil.h +++ b/src/system/inc/vnodeUtil.h @@ -75,6 +75,12 @@ int32_t vnodeIncQueryRefCount(SQueryMeterMsg *pQueryMsg, SMeterSidExtInfo **pSid void vnodeDecQueryRefCount(SQueryMeterMsg *pQueryMsg, SMeterObj **pMeterObjList, int32_t numOfInc); +int32_t vnodeTransferMeterState(SMeterObj* pMeterObj, int32_t state); +void vnodeClearMeterState(SMeterObj* pMeterObj, int32_t state); +bool vnodeIsMeterState(SMeterObj* pMeterObj, int32_t state); +void vnodeSetMeterDeleting(SMeterObj* pMeterObj); +bool vnodeIsSafeToDeleteMeter(SVnodeObj* pVnode, int32_t sid); + #ifdef __cplusplus } #endif diff --git a/src/system/src/dnodeMgmt.c b/src/system/src/dnodeMgmt.c index 0c6822159c07f6cb1e3b3052cc815770ffcbca4c..96bdeeb214285899e2e057a3b3497f9598093fc5 100644 --- a/src/system/src/dnodeMgmt.c +++ b/src/system/src/dnodeMgmt.c @@ -445,7 +445,8 @@ int vnodeProcessFreeVnodeRequest(char *pMsg) { } dTrace("vid:%d receive free vnode message", pFree->vnode); - vnodeRemoveVnode(pFree->vnode); + int32_t code = vnodeRemoveVnode(pFree->vnode); + assert(code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS); pStart = (char *)malloc(128); if (pStart == NULL) return 0; @@ -453,7 +454,7 @@ int vnodeProcessFreeVnodeRequest(char *pMsg) { *pStart = TSDB_MSG_TYPE_FREE_VNODE_RSP; pMsg = pStart + 1; - *pMsg = 0; + *pMsg = code; vnodeSendMsgToMgmt(pStart); return 0; diff --git a/src/system/src/mgmtMeter.c b/src/system/src/mgmtMeter.c index f8ac13061dfad20535a06c203665ad4cc3d4874a..bf09c90c7dfed25cc72852a7b60ee7b096ba9ac5 100644 --- a/src/system/src/mgmtMeter.c +++ b/src/system/src/mgmtMeter.c @@ -1140,54 +1140,13 @@ static void mgmtReorganizeMetersInMetricMeta(STabObj *pMetric, SMetricMetaMsg *p startPos[1] = (int32_t)pRes->num; } - /* if pInfo->limit == 0, the query will be intercepted by sdk, and wont be - * sent to mnode */ - assert(pInfo->limit == -1 || pInfo->limit > 0); - - int32_t numOfTotal = 0; - if (pInfo->offset >= numOfSubset) { - numOfTotal = 0; - } else if (numOfSubset == 1) { - // no 'groupBy' clause, all tables returned - numOfTotal = pRes->num; - } else { - /* there is a offset value of group */ - int32_t start = 0; - int32_t end = 0; - - if (pInfo->orderType == TSQL_SO_ASC) { - start = startPos[pInfo->offset]; - - if (pInfo->limit + pInfo->offset >= numOfSubset || pInfo->limit == -1) { - /* all results are required */ - end = startPos[numOfSubset]; - } else { - end = startPos[pInfo->limit + pInfo->offset]; - } - } else { - end = startPos[numOfSubset - pInfo->offset]; - - if (pInfo->limit + pInfo->offset >= numOfSubset || pInfo->limit == -1) { - start = startPos[0]; - } else { - start = startPos[numOfSubset - pInfo->limit - pInfo->offset]; - } - } - - numOfTotal = end - start; - assert(numOfTotal > 0); - - memmove(pRes->pRes, pRes->pRes + start, numOfTotal * POINTER_BYTES); - } - /* * sort the result according to vgid to ensure meters with the same vgid is * continuous in the result list */ __compar_fn_t functor = (pRes->nodeType == TAST_NODE_TYPE_METER_PTR) ? tabObjVGIDComparator : nodeVGIDComparator; - qsort(pRes->pRes, numOfTotal, POINTER_BYTES, functor); + qsort(pRes->pRes, (size_t) pRes->num, POINTER_BYTES, functor); - pRes->num = numOfTotal; free(descriptor->pTagSchema); free(descriptor); free(startPos); diff --git a/src/system/src/vnodeCache.c b/src/system/src/vnodeCache.c index 81ce27fd8348fd1079d604fa5dcae9acc8d3969d..4c166136d0b8024efaa9a2dd4bc99a90d6eb6a49 100644 --- a/src/system/src/vnodeCache.c +++ b/src/system/src/vnodeCache.c @@ -340,19 +340,33 @@ void vnodeCommitOver(SVnodeObj *pVnode) { pthread_mutex_unlock(&pPool->vmutex); } -void vnodeCancelCommit(SVnodeObj *pVnode) { +static void vnodeWaitForCommitComplete(SVnodeObj *pVnode) { SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); - if (pPool == NULL) return; - pthread_mutex_lock(&pPool->vmutex); + // wait for 100s at most + const int32_t totalCount = 1000; + int32_t count = 0; - if (pPool->commitInProcess) { - pPool->commitInProcess = 0; - pthread_cancel(pVnode->commitThread); + // all meter is marked as dropped, so the commit will abort very quickly + while(count++ < totalCount) { + int32_t commitInProcess = 0; + + pthread_mutex_lock(&pPool->vmutex); + commitInProcess = pPool->commitInProcess; + pthread_mutex_unlock(&pPool->vmutex); + + if (commitInProcess) { + dWarn("vid:%d still in commit, wait for completed", pVnode->vnode); + taosMsleep(10); + } } +} - pthread_mutex_unlock(&pPool->vmutex); +void vnodeCancelCommit(SVnodeObj *pVnode) { + SCachePool *pPool = (SCachePool *)(pVnode->pCachePool); + if (pPool == NULL) return; + vnodeWaitForCommitComplete(pVnode); taosTmrReset(vnodeProcessCommitTimer, pVnode->cfg.commitTime * 1000, pVnode, vnodeTmrCtrl, &pVnode->commitTimer); } diff --git a/src/system/src/vnodeCommit.c b/src/system/src/vnodeCommit.c index d2c6d825569bd6b3556857c2dd0942319d6aec7d..64de3bee7fe500bb94813b55203096a995348ccf 100644 --- a/src/system/src/vnodeCommit.c +++ b/src/system/src/vnodeCommit.c @@ -26,6 +26,7 @@ #include "tsdb.h" #include "vnode.h" +#include "vnodeUtil.h" typedef struct { int sversion; @@ -160,13 +161,17 @@ size_t vnodeRestoreDataFromLog(int vnode, char *fileName, uint64_t *firstV) { if (*(int *)(cont+head.contLen) != simpleCheck) break; SMeterObj *pObj = pVnode->meterList[head.sid]; if (pObj == NULL) { - dError( - "vid:%d, sid:%d not exists, ignore data in commit log, " - "contLen:%d action:%d", + dError("vid:%d, sid:%d not exists, ignore data in commit log, contLen:%d action:%d", vnode, head.sid, head.contLen, head.action); continue; } + if (vnodeIsMeterState(pObj, TSDB_METER_STATE_DELETING)) { + dWarn("vid:%d sid:%d id:%s, meter is dropped, ignore data in commit log, contLen:%d action:%d", + vnode, head.sid, head.contLen, head.action); + continue; + } + int32_t numOfPoints = 0; (*vnodeProcessAction[head.action])(pObj, cont, head.contLen, TSDB_DATA_SOURCE_LOG, NULL, head.sversion, &numOfPoints); diff --git a/src/system/src/vnodeFile.c b/src/system/src/vnodeFile.c index 82af0cf1459044a3af6e9986ff39905a9b716200..5fa3a4ad1fa738cb0becf25b5ae170c4eeb8bd94 100644 --- a/src/system/src/vnodeFile.c +++ b/src/system/src/vnodeFile.c @@ -577,8 +577,20 @@ _again: // read compInfo for (sid = 0; sid < pCfg->maxSessions; ++sid) { + if (pVnode->meterList == NULL) { // vnode is being freed, abort + goto _over; + } + pObj = (SMeterObj *)(pVnode->meterList[sid]); - if (pObj == NULL) continue; + if (pObj == NULL) { + continue; + } + + // meter is going to be deleted, abort + if (vnodeIsMeterState(pObj, TSDB_METER_STATE_DELETING)) { + dWarn("vid:%d sid:%d is dropped, ignore this meter", vnode, sid); + continue; + } pMeter = meterInfo + sid; pHeader = ((SCompHeader *)tmem) + sid; @@ -672,8 +684,9 @@ _again: pointsReadLast = pMeter->lastBlock.numOfPoints; query.over = 0; headInfo.totalStorage -= (pointsReadLast * pObj->bytesPerPoint); + dTrace("vid:%d sid:%d id:%s, points:%d in last block will be merged to new block", - pObj->vnode, pObj->sid, pObj->meterId, pointsReadLast); + pObj->vnode, pObj->sid, pObj->meterId, pointsReadLast); } pMeter->changed = 1; @@ -717,8 +730,8 @@ _again: } dTrace("vid:%d sid:%d id:%s, %d points are committed, lastKey:%lld slot:%d pos:%d newNumOfBlocks:%d", - pObj->vnode, pObj->sid, pObj->meterId, pMeter->committedPoints, pObj->lastKeyOnFile, query.slot, query.pos, - pMeter->newNumOfBlocks); + pObj->vnode, pObj->sid, pObj->meterId, pMeter->committedPoints, pObj->lastKeyOnFile, query.slot, query.pos, + pMeter->newNumOfBlocks); if (pMeter->committedPoints > 0) { pMeter->commitSlot = query.slot; diff --git a/src/system/src/vnodeImport.c b/src/system/src/vnodeImport.c index f9874edd36233ff745bb0423eb795254e8950d76..ea5f382f578eda8cdff4752f37ece3ffab0352f9 100644 --- a/src/system/src/vnodeImport.c +++ b/src/system/src/vnodeImport.c @@ -24,6 +24,7 @@ #include "vnode.h" #include "vnodeMgmt.h" #include "vnodeShell.h" +#include "vnodeShell.h" #include "vnodeUtil.h" #pragma GCC diagnostic ignored "-Wpointer-sign" #pragma GCC diagnostic ignored "-Wint-conversion" @@ -281,14 +282,32 @@ void vnodeProcessImportTimer(void *param, void *tmrId) { SShellObj * pShell = pImport->pShell; pImport->retry++; - pObj->state = TSDB_METER_STATE_IMPORTING; + //slow query will block the import operation + int32_t state = vnodeTransferMeterState(pObj, TSDB_METER_STATE_IMPORTING); + if (state >= TSDB_METER_STATE_DELETING) { + dError("vid:%d sid:%d id:%s, meter is deleted, failed to import, state:%d", + pObj->vnode, pObj->sid, pObj->meterId, state); + return; + } + + int32_t num = 0; + pthread_mutex_lock(&pVnode->vmutex); + num = pObj->numOfQueries; + pthread_mutex_unlock(&pVnode->vmutex); + + //if the num == 0, it will never be increased before state is set to TSDB_METER_STATE_READY + int32_t commitInProcess = 0; pthread_mutex_lock(&pPool->vmutex); - if (pPool->commitInProcess || pObj->numOfQueries > 0) { + if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0 || state != TSDB_METER_STATE_READY) { pthread_mutex_unlock(&pPool->vmutex); - pObj->state = TSDB_METER_STATE_READY; + vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); + if (pImport->retry < 1000) { - dTrace("vid:%d sid:%d id:%s, commit in process, try to import later", pObj->vnode, pObj->sid, pObj->meterId); + dTrace("vid:%d sid:%d id:%s, import failed, retry later. commit in process or queries on it, or not ready." + "commitInProcess:%d, numOfQueries:%d, state:%d", pObj->vnode, pObj->sid, pObj->meterId, + commitInProcess, num, state); + taosTmrStart(vnodeProcessImportTimer, 10, pImport, vnodeTmrCtrl); return; } else { @@ -304,7 +323,8 @@ void vnodeProcessImportTimer(void *param, void *tmrId) { } } - pObj->state = TSDB_METER_STATE_READY; + vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); + pVnode->version++; // send response back to shell @@ -862,15 +882,19 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi } if (*((TSKEY *)(pSubmit->payLoad + (rows - 1) * pObj->bytesPerPoint)) > pObj->lastKey) { + vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); + vnodeTransferMeterState(pObj, TSDB_METER_STATE_INSERT); code = vnodeInsertPoints(pObj, cont, contLen, TSDB_DATA_SOURCE_LOG, NULL, pObj->sversion, &pointsImported); + if (pShell) { pShell->code = code; pShell->numOfTotalPoints += pointsImported; } + + vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERT); } else { SImportInfo *pNew, import; - pObj->state = TSDB_METER_STATE_IMPORTING; dTrace("vid:%d sid:%d id:%s, import %d rows data", pObj->vnode, pObj->sid, pObj->meterId, rows); memset(&import, 0, sizeof(import)); import.firstKey = *((TSKEY *)(payload)); @@ -880,10 +904,19 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi import.payload = payload; import.rows = rows; + int32_t num = 0; + pthread_mutex_lock(&pVnode->vmutex); + num = pObj->numOfQueries; + pthread_mutex_unlock(&pVnode->vmutex); + + int32_t commitInProcess = 0; + pthread_mutex_lock(&pPool->vmutex); - if (pPool->commitInProcess || pObj->numOfQueries > 0) { + if (((commitInProcess = pPool->commitInProcess) == 1) || num > 0) { pthread_mutex_unlock(&pPool->vmutex); - pObj->state = TSDB_METER_STATE_READY; + + //restore meter state + vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); pNew = (SImportInfo *)malloc(sizeof(SImportInfo)); memcpy(pNew, &import, sizeof(SImportInfo)); @@ -892,8 +925,9 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi pNew->payload = malloc(payloadLen); memcpy(pNew->payload, payload, payloadLen); - dTrace("vid:%d sid:%d id:%s, commit/query:%d in process, import later, ", pObj->vnode, pObj->sid, pObj->meterId, - pObj->numOfQueries); + dTrace("vid:%d sid:%d id:%s, import later, commit in process:%d, numOfQueries:%d", pObj->vnode, pObj->sid, + pObj->meterId, commitInProcess, pObj->numOfQueries); + taosTmrStart(vnodeProcessImportTimer, 10, pNew, vnodeTmrCtrl); return 0; } else { @@ -905,9 +939,10 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi pShell->numOfTotalPoints += import.importedRows; } } + + vnodeClearMeterState(pObj, TSDB_METER_STATE_IMPORTING); } - pObj->state = TSDB_METER_STATE_READY; pVnode->version++; if (pShell) { @@ -918,6 +953,7 @@ int vnodeImportPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi return 0; } +//todo abort from the procedure if the meter is going to be dropped int vnodeImportData(SMeterObj *pObj, SImportInfo *pImport) { int code = 0; diff --git a/src/system/src/vnodeMeter.c b/src/system/src/vnodeMeter.c index 04dc22f7d6614847282c772a7caa6c74340f4a38..d6283cfa4f49f6b6fde5f2f81d43dc9dac4d9408 100644 --- a/src/system/src/vnodeMeter.c +++ b/src/system/src/vnodeMeter.c @@ -47,6 +47,8 @@ void vnodeFreeMeterObj(SMeterObj *pObj) { if (vnodeList[pObj->vnode].meterList != NULL) { vnodeList[pObj->vnode].meterList[pObj->sid] = NULL; } + + memset(pObj->meterId, 0, tListLen(pObj->meterId)); tfree(pObj); } @@ -143,7 +145,7 @@ int vnodeSaveMeterObjToFile(SMeterObj *pObj) { memcpy(buffer, pObj, offsetof(SMeterObj, reserved)); memcpy(buffer + offsetof(SMeterObj, reserved), pObj->schema, pObj->numOfColumns * sizeof(SColumn)); memcpy(buffer + offsetof(SMeterObj, reserved) + pObj->numOfColumns * sizeof(SColumn), pObj->pSql, pObj->sqlLen); - taosCalcChecksumAppend(0, buffer, new_length); + taosCalcChecksumAppend(0, (uint8_t *)buffer, new_length); if (offset == 0 || length < new_length) { // New, append to file end fseek(fp, 0, SEEK_END); @@ -208,7 +210,7 @@ int vnodeSaveAllMeterObjToFile(int vnode) { memcpy(buffer, pObj, offsetof(SMeterObj, reserved)); memcpy(buffer + offsetof(SMeterObj, reserved), pObj->schema, pObj->numOfColumns * sizeof(SColumn)); memcpy(buffer + offsetof(SMeterObj, reserved) + pObj->numOfColumns * sizeof(SColumn), pObj->pSql, pObj->sqlLen); - taosCalcChecksumAppend(0, buffer, new_length); + taosCalcChecksumAppend(0, (uint8_t *)buffer, new_length); if (offset == 0 || length > new_length) { // New, append to file end new_offset = fseek(fp, 0, SEEK_END); @@ -391,7 +393,7 @@ int vnodeOpenMetersVnode(int vnode) { fseek(fp, offset, SEEK_SET); if (fread(buffer, length, 1, fp) <= 0) break; - if (taosCheckChecksumWhole(buffer, length)) { + if (taosCheckChecksumWhole((uint8_t *)buffer, length)) { vnodeRestoreMeterObj(buffer, length - sizeof(TSCKSUM)); } else { dError("meter object file is broken since checksum mismatch, vnode: %d sid: %d, try to recover", vnode, sid); @@ -440,7 +442,7 @@ int vnodeCreateMeterObj(SMeterObj *pNew, SConnSec *pSec) { } dTrace("vid:%d sid:%d id:%s, update schema", pNew->vnode, pNew->sid, pNew->meterId); - if (pObj->state != TSDB_METER_STATE_UPDATING) vnodeUpdateMeter(pNew, NULL); + if (!vnodeIsMeterState(pObj, TSDB_METER_STATE_UPDATING)) vnodeUpdateMeter(pNew, NULL); return TSDB_CODE_SUCCESS; } @@ -483,27 +485,20 @@ int vnodeRemoveMeterObj(int vnode, int sid) { if (vnodeList[vnode].meterList == NULL) return 0; pObj = vnodeList[vnode].meterList[sid]; - if ((pObj == NULL) || (pObj->state == TSDB_METER_STATE_DELETED)) return 0; - if (pObj->state == TSDB_METER_STATE_IMPORTING) return TSDB_CODE_ACTION_IN_PROGRESS; + if (pObj == NULL) { + return TSDB_CODE_SUCCESS; + } - int32_t retFlag = 0; - pthread_mutex_lock(&vnodeList[vnode].vmutex); - pObj->state = TSDB_METER_STATE_DELETING; - if (pObj->numOfQueries > 0) { - retFlag = TSDB_CODE_ACTION_IN_PROGRESS; - dWarn("vid:%d sid:%d id:%s %d queries executing on it, wait query to be finished", - vnode, pObj->sid, pObj->meterId, pObj->numOfQueries); + if (!vnodeIsSafeToDeleteMeter(&vnodeList[vnode], sid)) { + return TSDB_CODE_ACTION_IN_PROGRESS; } - pthread_mutex_unlock(&vnodeList[vnode].vmutex); - if (retFlag != 0) return retFlag; - // after remove this meter, change its stat to DELETED + // after remove this meter, change its state to DELETED pObj->state = TSDB_METER_STATE_DELETED; pObj->timeStamp = taosGetTimestampMs(); vnodeList[vnode].lastRemove = pObj->timeStamp; vnodeRemoveStream(pObj); - pObj->meterId[0] = 0; vnodeSaveMeterObjToFile(pObj); vnodeFreeMeterObj(pObj); @@ -578,10 +573,19 @@ int vnodeInsertPoints(SMeterObj *pObj, char *cont, int contLen, char source, voi if (pVnode->lastKeyOnFile > pVnode->cfg.daysToKeep * tsMsPerDay[pVnode->cfg.precision] + firstKey) { dError("vid:%d sid:%d id:%s, vnode lastKeyOnFile:%lld, data is too old to insert, key:%lld", pObj->vnode, pObj->sid, pObj->meterId, pVnode->lastKeyOnFile, firstKey); - return TSDB_CODE_OTHERS; + return TSDB_CODE_TIMESTAMP_OUT_OF_RANGE; } for (i = 0; i < numOfPoints; ++i) { + // meter will be dropped, abort current insertion + if (pObj->state >= TSDB_METER_STATE_DELETING) { + dWarn("vid:%d sid:%d id:%s, meter is dropped, abort insert, state:%d", pObj->vnode, pObj->sid, pObj->meterId, + pObj->state); + + code = TSDB_CODE_INVALID_SESSION_ID; + break; + } + if (*((TSKEY *)pData) <= pObj->lastKey) { dWarn("vid:%d sid:%d id:%s, received key:%ld not larger than lastKey:%ld", pObj->vnode, pObj->sid, pObj->meterId, *((TSKEY *)pData), pObj->lastKey); @@ -632,9 +636,11 @@ void vnodeProcessUpdateSchemaTimer(void *param, void *tmrId) { pthread_mutex_lock(&pPool->vmutex); if (pPool->commitInProcess) { - dTrace("vid:%d sid:%d mid:%s, commiting in process, commit later", pObj->vnode, pObj->sid, pObj->meterId); - if (taosTmrStart(vnodeProcessUpdateSchemaTimer, 10, pObj, vnodeTmrCtrl) == NULL) - pObj->state = TSDB_METER_STATE_READY; + dTrace("vid:%d sid:%d mid:%s, committing in process, commit later", pObj->vnode, pObj->sid, pObj->meterId); + if (taosTmrStart(vnodeProcessUpdateSchemaTimer, 10, pObj, vnodeTmrCtrl) == NULL) { + vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING); + } + pthread_mutex_unlock(&pPool->vmutex); return; } @@ -649,41 +655,54 @@ void vnodeUpdateMeter(void *param, void *tmrId) { SMeterObj *pNew = (SMeterObj *)param; if (pNew == NULL || pNew->vnode < 0 || pNew->sid < 0) return; - if (vnodeList[pNew->vnode].meterList == NULL) { + SVnodeObj* pVnode = &vnodeList[pNew->vnode]; + + if (pVnode->meterList == NULL) { dTrace("vid:%d sid:%d id:%s, vnode is deleted, abort update schema", pNew->vnode, pNew->sid, pNew->meterId); free(pNew->schema); free(pNew); return; } - SMeterObj *pObj = vnodeList[pNew->vnode].meterList[pNew->sid]; - if (pObj == NULL) { + SMeterObj *pObj = pVnode->meterList[pNew->sid]; + if (pObj == NULL || vnodeIsMeterState(pObj, TSDB_METER_STATE_DELETING)) { dTrace("vid:%d sid:%d id:%s, meter is deleted, abort update schema", pNew->vnode, pNew->sid, pNew->meterId); free(pNew->schema); free(pNew); return; } - pObj->state = TSDB_METER_STATE_UPDATING; + int32_t state = vnodeTransferMeterState(pObj, TSDB_METER_STATE_UPDATING); + if (state >= TSDB_METER_STATE_DELETING) { + dError("vid:%d sid:%d id:%s, meter is deleted, failed to update, state:%d", + pObj->vnode, pObj->sid, pObj->meterId, state); + return; + } + + int32_t num = 0; + pthread_mutex_lock(&pVnode->vmutex); + num = pObj->numOfQueries; + pthread_mutex_unlock(&pVnode->vmutex); + + if (num > 0 || state != TSDB_METER_STATE_READY) { + dTrace("vid:%d sid:%d id:%s, update failed, retry later, numOfQueries:%d, state:%d", + pNew->vnode, pNew->sid, pNew->meterId, num, state); - if (pObj->numOfQueries > 0) { + // retry update meter in 50ms if (taosTmrStart(vnodeUpdateMeter, 50, pNew, vnodeTmrCtrl) == NULL) { - dError("vid:%d sid:%d id:%s, failed to start update timer", pNew->vnode, pNew->sid, pNew->meterId); - pObj->state = TSDB_METER_STATE_READY; + dError("vid:%d sid:%d id:%s, failed to start update timer, no retry", pNew->vnode, pNew->sid, pNew->meterId); free(pNew->schema); free(pNew); } - - dTrace("vid:%d sid:%d id:%s, there are ongoing queries, update later", pNew->vnode, pNew->sid, pNew->meterId); return; } // commit first if (!vnodeIsCacheCommitted(pObj)) { - // commit + // commit data first if (taosTmrStart(vnodeProcessUpdateSchemaTimer, 0, pObj, vnodeTmrCtrl) == NULL) { dError("vid:%d sid:%d id:%s, failed to start commit timer", pObj->vnode, pObj->sid, pObj->meterId); - pObj->state = TSDB_METER_STATE_READY; + vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING); free(pNew->schema); free(pNew); return; @@ -691,13 +710,14 @@ void vnodeUpdateMeter(void *param, void *tmrId) { if (taosTmrStart(vnodeUpdateMeter, 50, pNew, vnodeTmrCtrl) == NULL) { dError("vid:%d sid:%d id:%s, failed to start update timer", pNew->vnode, pNew->sid, pNew->meterId); - pObj->state = TSDB_METER_STATE_READY; + vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING); free(pNew->schema); free(pNew); } dTrace("vid:%d sid:%d meterId:%s, there are data in cache, commit first, update later", pNew->vnode, pNew->sid, pNew->meterId); + vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING); return; } @@ -716,7 +736,7 @@ void vnodeUpdateMeter(void *param, void *tmrId) { pObj->sversion = pNew->sversion; vnodeSaveMeterObjToFile(pObj); - pObj->state = TSDB_METER_STATE_READY; + vnodeClearMeterState(pObj, TSDB_METER_STATE_UPDATING); dTrace("vid:%d sid:%d id:%s, schema is updated", pNew->vnode, pNew->sid, pNew->meterId); free(pNew); diff --git a/src/system/src/vnodeQueryImpl.c b/src/system/src/vnodeQueryImpl.c index 7b315e9753e25b11e4494d22ffea717f8edc08dd..c958754298f29d9bce86851b13e0957f87f78177 100644 --- a/src/system/src/vnodeQueryImpl.c +++ b/src/system/src/vnodeQueryImpl.c @@ -1730,6 +1730,17 @@ static int64_t getOldestKey(int32_t numOfFiles, int64_t fileId, SVnodeCfg *pCfg) bool isQueryKilled(SQuery *pQuery) { SQInfo *pQInfo = (SQInfo *)GET_QINFO_ADDR(pQuery); + + /* + * check if the queried meter is going to be deleted. + * if it will be deleted soon, stop current query ASAP. + */ + SMeterObj* pMeterObj = pQInfo->pObj; + if (vnodeIsMeterState(pMeterObj, TSDB_METER_STATE_DELETING)) { + pQInfo->killed = 1; + return true; + } + return (pQInfo->killed == 1); } diff --git a/src/system/src/vnodeShell.c b/src/system/src/vnodeShell.c index 3134a04b9aedbd9575d2a9121814ba0c53fe9ec9..bb159706fb124024e6f397cafc766743e996cdcd 100644 --- a/src/system/src/vnodeShell.c +++ b/src/system/src/vnodeShell.c @@ -15,12 +15,13 @@ #define _DEFAULT_SOURCE -#include "vnodeShell.h" #include #include #include #include #include "taosmsg.h" +#include "vnode.h" +#include "vnodeShell.h" #include "tschemautil.h" #include "textbuffer.h" @@ -28,6 +29,7 @@ #include "vnode.h" #include "vnodeRead.h" #include "vnodeUtil.h" + #pragma GCC diagnostic ignored "-Wint-conversion" void * pShellServer = NULL; @@ -87,6 +89,7 @@ void *vnodeProcessMsgFromShell(char *msg, void *ahandle, void *thandle) { dTrace("vid:%d sid:%d, msg:%s is received pConn:%p", vnode, sid, taosMsg[pMsg->msgType], thandle); + // set in query processing flag if (pMsg->msgType == TSDB_MSG_TYPE_QUERY) { vnodeProcessQueryRequest((char *)pMsg->content, pMsg->msgLen - sizeof(SIntMsg), pObj); } else if (pMsg->msgType == TSDB_MSG_TYPE_RETRIEVE) { @@ -96,7 +99,7 @@ void *vnodeProcessMsgFromShell(char *msg, void *ahandle, void *thandle) { } else { dError("%s is not processed", taosMsg[pMsg->msgType]); } - + return pObj; } @@ -157,16 +160,30 @@ int vnodeOpenShellVnode(int vnode) { return 0; } -void vnodeCloseShellVnode(int vnode) { - taosCloseRpcChann(pShellServer, vnode); +void vnodeDelayedFreeResource(void *param, void *tmrId) { + int32_t vnode = *(int32_t*) param; + taosCloseRpcChann(pShellServer, vnode); // close connection + tfree (shellList[vnode]); //free SShellObj + + tfree(param); +} +void vnodeCloseShellVnode(int vnode) { if (shellList[vnode] == NULL) return; for (int i = 0; i < vnodeList[vnode].cfg.maxSessions; ++i) { vnodeFreeQInfo(shellList[vnode][i].qhandle, true); } - tfree(shellList[vnode]); + int32_t* v = malloc(sizeof(int32_t)); + *v = vnode; + + /* + * free the connection related resource after 5sec, since the msg may be in + * the task queue, free it immediate will cause crash + */ + dTrace("vid:%d, delay 5sec to free resources", vnode); + taosTmrStart(vnodeDelayedFreeResource, 5000, v, vnodeTmrCtrl); } void vnodeCleanUpShell() { @@ -488,24 +505,38 @@ int vnodeProcessShellSubmitRequest(char *pMsg, int msgLen, SShellObj *pObj) { int subMsgLen = sizeof(pBlocks->numOfRows) + htons(pBlocks->numOfRows) * pMeterObj->bytesPerPoint; int sversion = htonl(pBlocks->sversion); - if (pMeterObj->state == TSDB_METER_STATE_READY) { - if (pSubmit->import) - code = vnodeImportPoints(pMeterObj, (char *)&(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, pObj, + int32_t state = TSDB_METER_STATE_READY; + if (pSubmit->import) { + state = vnodeTransferMeterState(pMeterObj, TSDB_METER_STATE_IMPORTING); + } else { + state = vnodeTransferMeterState(pMeterObj, TSDB_METER_STATE_INSERT); + } + + if (state == TSDB_METER_STATE_READY) { + // meter status is ready for insert/import + if (pSubmit->import) { + code = vnodeImportPoints(pMeterObj, (char *) &(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, pObj, sversion, &numOfPoints); - else - code = vnodeInsertPoints(pMeterObj, (char *)&(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, NULL, + } else { + code = vnodeInsertPoints(pMeterObj, (char *) &(pBlocks->numOfRows), subMsgLen, TSDB_DATA_SOURCE_SHELL, NULL, sversion, &numOfPoints); - if (code != 0) break; - } else if (pMeterObj->state >= TSDB_METER_STATE_DELETING) { - dTrace("vid:%d sid:%d id:%s, is is removed, state:", pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, - pMeterObj->state); - code = TSDB_CODE_NOT_ACTIVE_SESSION; - break; - } else { // importing state or others - dTrace("vid:%d sid:%d id:%s, try again since in state:%d", pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, - pMeterObj->state); - code = TSDB_CODE_ACTION_IN_PROGRESS; - break; + vnodeClearMeterState(pMeterObj, TSDB_METER_STATE_INSERT); + } + + if (code != TSDB_CODE_SUCCESS) {break;} + } else { + if (vnodeIsMeterState(pMeterObj, TSDB_METER_STATE_DELETING)) { + dTrace("vid:%d sid:%d id:%s, it is removed, state:%d", pMeterObj->vnode, pMeterObj->sid, pMeterObj->meterId, + pMeterObj->state); + code = TSDB_CODE_NOT_ACTIVE_SESSION; + break; + } else {// waiting for 300ms by default and try again + dTrace("vid:%d sid:%d id:%s, try submit again since in state:%d", pMeterObj->vnode, pMeterObj->sid, + pMeterObj->meterId, pMeterObj->state); + + code = TSDB_CODE_ACTION_IN_PROGRESS; + break; + } } numOfTotalPoints += numOfPoints; diff --git a/src/system/src/vnodeStore.c b/src/system/src/vnodeStore.c index 29d4b46e8b77995262326d2585c1b817cc909b81..d596f50b6116c597672dbc4b8260cfa81559a986 100644 --- a/src/system/src/vnodeStore.c +++ b/src/system/src/vnodeStore.c @@ -85,13 +85,42 @@ int vnodeOpenVnode(int vnode) { return 0; } -void vnodeCloseVnode(int vnode) { - if (vnodeList == NULL) return; +static int32_t vnodeMarkAllMetersDropped(SVnodeObj* pVnode) { + if (pVnode->meterList == NULL) { + assert(pVnode->cfg.maxSessions == 0); + return TSDB_CODE_SUCCESS; + } + + bool ready = true; + for (int sid = 0; sid < pVnode->cfg.maxSessions; ++sid) { + if (!vnodeIsSafeToDeleteMeter(pVnode, sid)) { + ready = false; + } else { // set the meter is to be deleted + SMeterObj* pObj = pVnode->meterList[sid]; + if (pObj != NULL) { + pObj->state = TSDB_METER_STATE_DELETED; + } + } + } + + return ready? TSDB_CODE_SUCCESS:TSDB_CODE_ACTION_IN_PROGRESS; +} + +int vnodeCloseVnode(int vnode) { + if (vnodeList == NULL) return TSDB_CODE_SUCCESS; + + SVnodeObj* pVnode = &vnodeList[vnode]; pthread_mutex_lock(&dmutex); - if (vnodeList[vnode].cfg.maxSessions == 0) { + if (pVnode->cfg.maxSessions == 0) { pthread_mutex_unlock(&dmutex); - return; + return TSDB_CODE_SUCCESS; + } + + // set the meter is dropped flag + if (vnodeMarkAllMetersDropped(pVnode) != TSDB_CODE_SUCCESS) { + pthread_mutex_unlock(&dmutex); + return TSDB_CODE_ACTION_IN_PROGRESS; } vnodeCloseStream(vnodeList + vnode); @@ -111,6 +140,7 @@ void vnodeCloseVnode(int vnode) { vnodeCalcOpenVnodes(); pthread_mutex_unlock(&dmutex); + return TSDB_CODE_SUCCESS; } int vnodeCreateVnode(int vnode, SVnodeCfg *pCfg, SVPeerDesc *pDesc) { @@ -182,25 +212,23 @@ void vnodeRemoveDataFiles(int vnode) { dTrace("vnode %d is removed!", vnode); } -void vnodeRemoveVnode(int vnode) { - if (vnodeList == NULL) return; +int vnodeRemoveVnode(int vnode) { + if (vnodeList == NULL) return TSDB_CODE_SUCCESS; if (vnodeList[vnode].cfg.maxSessions > 0) { - vnodeCloseVnode(vnode); + int32_t ret = vnodeCloseVnode(vnode); + if (ret != TSDB_CODE_SUCCESS) { + return ret; + } vnodeRemoveDataFiles(vnode); - - // sprintf(cmd, "rm -rf %s/vnode%d", tsDirectory, vnode); - // if ( system(cmd) < 0 ) { - // dError("vid:%d, failed to run command %s vnode, reason:%s", vnode, cmd, strerror(errno)); - // } else { - // dTrace("vid:%d, this vnode is deleted!!!", vnode); - // } } else { dTrace("vid:%d, max sessions:%d, this vnode already dropped!!!", vnode, vnodeList[vnode].cfg.maxSessions); - vnodeList[vnode].cfg.maxSessions = 0; + vnodeList[vnode].cfg.maxSessions = 0; //reset value vnodeCalcOpenVnodes(); } + + return TSDB_CODE_SUCCESS; } int vnodeInitStore() { diff --git a/src/system/src/vnodeStream.c b/src/system/src/vnodeStream.c index f2a0eeccbb2c756e1b30e9c5783d160c0cee80b1..75bb972e74b44f25bf325adeb34bd27faae66960 100644 --- a/src/system/src/vnodeStream.c +++ b/src/system/src/vnodeStream.c @@ -51,8 +51,17 @@ void vnodeProcessStreamRes(void *param, TAOS_RES *tres, TAOS_ROW row) { } contLen += sizeof(SSubmitMsg); + int32_t numOfPoints = 0; - vnodeInsertPoints(pObj, (char *)pMsg, contLen, TSDB_DATA_SOURCE_SHELL, NULL, pObj->sversion, &numOfPoints); + + int32_t state = vnodeTransferMeterState(pObj, TSDB_METER_STATE_INSERT); + if (state == TSDB_METER_STATE_READY) { + vnodeInsertPoints(pObj, (char *)pMsg, contLen, TSDB_DATA_SOURCE_SHELL, NULL, pObj->sversion, &numOfPoints); + vnodeClearMeterState(pObj, TSDB_METER_STATE_INSERT); + } else { + dError("vid:%d sid:%d id:%s, failed to insert continuous query results, state:%d", pObj->vnode, pObj->sid, + pObj->meterId, state); + } assert(numOfPoints >= 0 && numOfPoints <= 1); tfree(pTemp); @@ -76,7 +85,7 @@ void vnodeOpenStreams(void *param, void *tmrId) { for (int sid = 0; sid < pVnode->cfg.maxSessions; ++sid) { pObj = pVnode->meterList[sid]; - if (pObj == NULL || pObj->sqlLen == 0 || pObj->status == 1 || pObj->state == TSDB_METER_STATE_DELETED) continue; + if (pObj == NULL || pObj->sqlLen == 0 || vnodeIsMeterState(pObj, TSDB_METER_STATE_DELETING)) continue; dTrace("vid:%d sid:%d id:%s, open stream:%s", pObj->vnode, sid, pObj->meterId, pObj->pSql); diff --git a/src/system/src/vnodeUtil.c b/src/system/src/vnodeUtil.c index 51fe9fc3bd1ff7725bd150aa9f77daac13ff1e66..f97b7cb00bc4809e73f02fcdeca7dec990f13607 100644 --- a/src/system/src/vnodeUtil.c +++ b/src/system/src/vnodeUtil.c @@ -361,6 +361,7 @@ void vnodeUpdateFilterColumnIndex(SQuery* pQuery) { // TODO support k<12 and k<>9 int32_t vnodeCreateFilterInfo(void* pQInfo, SQuery* pQuery) { + for (int32_t i = 0; i < pQuery->numOfCols; ++i) { if (pQuery->colList[i].data.filterOn > 0) { pQuery->numOfFilterCols++; @@ -401,8 +402,6 @@ int32_t vnodeCreateFilterInfo(void* pQInfo, SQuery* pQuery) { pFilterInfo->fp = rangeFilterArray[2]; } } else { - assert(lower == TSDB_RELATION_LARGE); - if (upper == TSDB_RELATION_LESS_EQUAL) { pFilterInfo->fp = rangeFilterArray[3]; } else { @@ -421,6 +420,7 @@ int32_t vnodeCreateFilterInfo(void* pQInfo, SQuery* pQuery) { pFilterInfo->fp = filterArray[upper]; } } + pFilterInfo->elemSize = bytes; j++; } @@ -470,6 +470,18 @@ bool vnodeIsProjectionQuery(SSqlFunctionExpr* pExpr, int32_t numOfOutput) { return true; } +/* + * the pMeter->state may be changed by vnodeIsSafeToDeleteMeter and import/update processor, the check of + * the state will not always be correct. + * + * The import/update/deleting is actually blocked by current query processing if the check of meter state is + * passed, but later queries are denied. + * + * 1. vnodeIsSafeToDelete will wait for this complete, since it also use the vmutex to check the numOfQueries + * 2. import will check the numOfQueries again after setting state to be TSDB_METER_STATE_IMPORTING, while the + * vmutex is also used. + * 3. insert has nothing to do with the query processing. + */ int32_t vnodeIncQueryRefCount(SQueryMeterMsg* pQueryMsg, SMeterSidExtInfo** pSids, SMeterObj** pMeterObjList, int32_t* numOfInc) { SVnodeObj* pVnode = &vnodeList[pQueryMsg->vnode]; @@ -477,21 +489,24 @@ int32_t vnodeIncQueryRefCount(SQueryMeterMsg* pQueryMsg, SMeterSidExtInfo** pSid int32_t num = 0; int32_t code = TSDB_CODE_SUCCESS; - // check all meter metadata to ensure all metadata are identical. for (int32_t i = 0; i < pQueryMsg->numOfSids; ++i) { SMeterObj* pMeter = pVnode->meterList[pSids[i]->sid]; - if (pMeter == NULL || pMeter->state != TSDB_METER_STATE_READY) { - if (pMeter == NULL) { + if (pMeter == NULL || (pMeter->state > TSDB_METER_STATE_INSERT)) { + if (pMeter == NULL || vnodeIsMeterState(pMeter, TSDB_METER_STATE_DELETING)) { code = TSDB_CODE_NOT_ACTIVE_SESSION; - dError("qmsg:%p, vid:%d sid:%d, not there", pQueryMsg, pQueryMsg->vnode, pSids[i]->sid); + dError("qmsg:%p, vid:%d sid:%d, not there or will be dropped", pQueryMsg, pQueryMsg->vnode, pSids[i]->sid); vnodeSendMeterCfgMsg(pQueryMsg->vnode, pSids[i]->sid); - } else { + } else {//update or import code = TSDB_CODE_ACTION_IN_PROGRESS; dTrace("qmsg:%p, vid:%d sid:%d id:%s, it is in state:%d, wait!", pQueryMsg, pQueryMsg->vnode, pSids[i]->sid, pMeter->meterId, pMeter->state); } } else { + /* + * vnodeIsSafeToDeleteMeter will wait for this function complete, and then it can + * check if the numOfQueries is 0 or not. + */ pMeterObjList[(*numOfInc)++] = pMeter; __sync_fetch_and_add(&pMeter->numOfQueries, 1); @@ -517,7 +532,6 @@ void vnodeDecQueryRefCount(SQueryMeterMsg* pQueryMsg, SMeterObj** pMeterObjList, SMeterObj* pMeter = pMeterObjList[i]; if (pMeter != NULL) { // here, do not need to lock to perform operations - assert(pMeter->state != TSDB_METER_STATE_DELETING && pMeter->state != TSDB_METER_STATE_DELETED); __sync_fetch_and_sub(&pMeter->numOfQueries, 1); if (pMeter->numOfQueries > 0) { @@ -571,3 +585,66 @@ void vnodeUpdateQueryColumnIndex(SQuery* pQuery, SMeterObj* pMeterObj) { } } } + +int32_t vnodeTransferMeterState(SMeterObj* pMeterObj, int32_t state) { + return __sync_val_compare_and_swap(&pMeterObj->state, TSDB_METER_STATE_READY, state); +} + +void vnodeClearMeterState(SMeterObj* pMeterObj, int32_t state) { + pMeterObj->state &= (~state); +} + +bool vnodeIsMeterState(SMeterObj* pMeterObj, int32_t state) { + if (state == TSDB_METER_STATE_READY) { + return pMeterObj->state == TSDB_METER_STATE_READY; + } else if (state == TSDB_METER_STATE_DELETING) { + return pMeterObj->state >= state; + } else { + return (((pMeterObj->state) & state) == state); + } +} + +void vnodeSetMeterDeleting(SMeterObj* pMeterObj) { + if (pMeterObj == NULL) { + return; + } + + pMeterObj->state |= TSDB_METER_STATE_DELETING; +} + +bool vnodeIsSafeToDeleteMeter(SVnodeObj* pVnode, int32_t sid) { + SMeterObj* pObj = pVnode->meterList[sid]; + + if (pObj == NULL || vnodeIsMeterState(pObj, TSDB_METER_STATE_DELETED)) { + return true; + } + + int32_t prev = vnodeTransferMeterState(pObj, TSDB_METER_STATE_DELETING); + + /* + * if the meter is not in ready/deleting state, it must be in insert/import/update, + * set the deleting state and wait the procedure to be completed + */ + if (prev != TSDB_METER_STATE_READY && prev < TSDB_METER_STATE_DELETING) { + vnodeSetMeterDeleting(pObj); + + dWarn("vid:%d sid:%d id:%s, can not be deleted, state:%d, wait", pObj->vnode, pObj->sid, pObj->meterId, prev); + return false; + } + + bool ready = true; + + /* + * the query will be stopped ASAP, since the state of meter is set to TSDB_METER_STATE_DELETING, + * and new query will abort since the meter is deleted. + */ + pthread_mutex_lock(&pVnode->vmutex); + if (pObj->numOfQueries > 0) { + dWarn("vid:%d sid:%d id:%s %d queries executing on it, wait query to be finished", + pObj->vnode, pObj->sid, pObj->meterId, pObj->numOfQueries); + ready = false; + } + pthread_mutex_unlock(&pVnode->vmutex); + + return ready; +} diff --git a/src/util/src/textbuffer.c b/src/util/src/textbuffer.c index 060ed82fbaace07d611214d31e69815d205dbcc4..7c283c0f38659599a40a6561fa08a5a10d49c2e0 100644 --- a/src/util/src/textbuffer.c +++ b/src/util/src/textbuffer.c @@ -1532,7 +1532,7 @@ void tColModelDisplayEx(tColModel *pModel, void *pData, int32_t numOfRows, int32 } //////////////////////////////////////////////////////////////////////////////////////////// -void tColModelCompress(tColModel *pModel, tFilePage *inputBuffer, int32_t maxElemsCapacity) { +void tColModelCompact(tColModel *pModel, tFilePage *inputBuffer, int32_t maxElemsCapacity) { if (inputBuffer->numOfElems == 0 || maxElemsCapacity == inputBuffer->numOfElems) { return; } diff --git a/src/util/src/tinterpolation.c b/src/util/src/tinterpolation.c index d036989410ac82f14729a5d9d7033d620b117b61..12f3064d7a22204d4a4765b0eda87e2c391d99ff 100644 --- a/src/util/src/tinterpolation.c +++ b/src/util/src/tinterpolation.c @@ -117,7 +117,7 @@ int32_t taosGetNumOfResWithoutLimit(SInterpolationInfo* pInterpoInfo, int64_t* p } } -bool taosHasNoneInterpoPoints(SInterpolationInfo* pInterpoInfo) { return taosNumOfRemainPoints(pInterpoInfo) > 0; } +bool taosHasRemainsDataForInterpolation(SInterpolationInfo* pInterpoInfo) { return taosNumOfRemainPoints(pInterpoInfo) > 0; } int32_t taosNumOfRemainPoints(SInterpolationInfo* pInterpoInfo) { if (pInterpoInfo->rowIdx == -1 || pInterpoInfo->numOfRawDataInRows == 0) {