/* * Copyright (c) 2019 TAOS Data, Inc. * * This program is free software: you can use, redistribute, and/or modify * it under the terms of the GNU Affero General Public License, version 3 * or later ("AGPL"), as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #include "parUtil.h" #include "cJSON.h" static char* getSyntaxErrFormat(int32_t errCode) { switch (errCode) { case TSDB_CODE_PAR_SYNTAX_ERROR: return "syntax error near \"%s\""; case TSDB_CODE_PAR_INCOMPLETE_SQL: return "Incomplete SQL statement"; case TSDB_CODE_PAR_INVALID_COLUMN: return "Invalid column name: %s"; case TSDB_CODE_PAR_TABLE_NOT_EXIST: return "Table does not exist: %s"; case TSDB_CODE_PAR_AMBIGUOUS_COLUMN: return "Column ambiguously defined: %s"; case TSDB_CODE_PAR_WRONG_VALUE_TYPE: return "Invalid value type: %s"; case TSDB_CODE_PAR_ILLEGAL_USE_AGG_FUNCTION: return "There mustn't be aggregation"; case TSDB_CODE_PAR_WRONG_NUMBER_OF_SELECT: return "ORDER BY item must be the number of a SELECT-list expression"; case TSDB_CODE_PAR_GROUPBY_LACK_EXPRESSION: return "Not a GROUP BY expression"; case TSDB_CODE_PAR_NOT_SELECTED_EXPRESSION: return "Not SELECTed expression"; case TSDB_CODE_PAR_NOT_SINGLE_GROUP: return "Not a single-group group function"; case TSDB_CODE_PAR_TAGS_NOT_MATCHED: return "Tags number not matched"; case TSDB_CODE_PAR_INVALID_TAG_NAME: return "Invalid tag name: %s"; case TSDB_CODE_PAR_NAME_OR_PASSWD_TOO_LONG: return "Name or password too long"; case TSDB_CODE_PAR_PASSWD_EMPTY: return "Password can not be empty"; case TSDB_CODE_PAR_INVALID_PORT: return "Port should be an integer that is less than 65535 and greater than 0"; case TSDB_CODE_PAR_INVALID_ENDPOINT: return "Endpoint should be in the format of 'fqdn:port'"; case TSDB_CODE_PAR_EXPRIE_STATEMENT: return "This statement is no longer supported"; case TSDB_CODE_PAR_INTER_VALUE_TOO_SMALL: return "Interval cannot be less than %d us"; case TSDB_CODE_PAR_DB_NOT_SPECIFIED: return "Database not specified"; case TSDB_CODE_PAR_INVALID_IDENTIFIER_NAME: return "Invalid identifier name: %s"; case TSDB_CODE_PAR_CORRESPONDING_STABLE_ERR: return "Corresponding super table not in this db"; case TSDB_CODE_PAR_INVALID_RANGE_OPTION: return "Invalid option %s: %" PRId64 " valid range: [%d, %d]"; case TSDB_CODE_PAR_INVALID_STR_OPTION: return "Invalid option %s: %s"; case TSDB_CODE_PAR_INVALID_ENUM_OPTION: return "Invalid option %s: %" PRId64 ", only %d, %d allowed"; case TSDB_CODE_PAR_INVALID_KEEP_NUM: return "Invalid number of keep options"; case TSDB_CODE_PAR_INVALID_KEEP_ORDER: return "Invalid keep value, should be keep0 <= keep1 <= keep2"; case TSDB_CODE_PAR_INVALID_KEEP_VALUE: return "Invalid option keep: %d, %d, %d valid range: [%d, %d]"; case TSDB_CODE_PAR_INVALID_COMMENT_OPTION: return "Invalid option comment, length cannot exceed %d"; case TSDB_CODE_PAR_INVALID_F_RANGE_OPTION: return "Invalid option %s: %f valid range: [%d, %d]"; case TSDB_CODE_PAR_INVALID_ROLLUP_OPTION: return "Invalid option rollup: only one function is allowed"; case TSDB_CODE_PAR_INVALID_RETENTIONS_OPTION: return "Invalid option retentions"; case TSDB_CODE_PAR_GROUPBY_WINDOW_COEXIST: return "GROUP BY and WINDOW-clause can't be used together"; case TSDB_CODE_PAR_INVALID_OPTION_UNIT: return "Invalid option %s unit: %c, only m, h, d allowed"; case TSDB_CODE_PAR_INVALID_KEEP_UNIT: return "Invalid option keep unit: %c, only m, h, d allowed"; case TSDB_CODE_PAR_AGG_FUNC_NESTING: return "Aggregate functions do not support nesting"; case TSDB_CODE_PAR_INVALID_STATE_WIN_TYPE: return "Only support STATE_WINDOW on integer/bool/varchar column"; case TSDB_CODE_PAR_INVALID_STATE_WIN_COL: return "Not support STATE_WINDOW on tag column"; case TSDB_CODE_PAR_INVALID_STATE_WIN_TABLE: return "STATE_WINDOW not support for super table query"; case TSDB_CODE_PAR_INTER_SESSION_GAP: return "SESSION gap should be fixed time window, and greater than 0"; case TSDB_CODE_PAR_INTER_SESSION_COL: return "Only support SESSION on primary timestamp column"; case TSDB_CODE_PAR_INTER_OFFSET_NEGATIVE: return "Interval offset cannot be negative"; case TSDB_CODE_PAR_INTER_OFFSET_UNIT: return "Cannot use 'year' as offset when interval is 'month'"; case TSDB_CODE_PAR_INTER_OFFSET_TOO_BIG: return "Interval offset should be shorter than interval"; case TSDB_CODE_PAR_INTER_SLIDING_UNIT: return "Does not support sliding when interval is natural month/year"; case TSDB_CODE_PAR_INTER_SLIDING_TOO_BIG: return "sliding value no larger than the interval value"; case TSDB_CODE_PAR_INTER_SLIDING_TOO_SMALL: return "sliding value can not less than 1% of interval value"; case TSDB_CODE_PAR_ONLY_ONE_JSON_TAG: return "Only one tag if there is a json tag"; case TSDB_CODE_PAR_INCORRECT_NUM_OF_COL: return "Query block has incorrect number of result columns"; case TSDB_CODE_PAR_INCORRECT_TIMESTAMP_VAL: return "Incorrect TIMESTAMP value: %s"; case TSDB_CODE_PAR_INVALID_DAYS_VALUE: return "Invalid days value, should be keep2 >= keep1 >= keep0 >= days"; case TSDB_CODE_PAR_OFFSET_LESS_ZERO: return "soffset/offset can not be less than 0"; case TSDB_CODE_PAR_SLIMIT_LEAK_PARTITION_BY: return "slimit/soffset only available for PARTITION BY query"; case TSDB_CODE_PAR_INVALID_TOPIC_QUERY: return "Invalid topic query"; case TSDB_CODE_PAR_INVALID_DROP_STABLE: return "Cannot drop super table in batch"; case TSDB_CODE_PAR_INVALID_FILL_TIME_RANGE: return "Start(end) time of query range required or time range too large"; case TSDB_CODE_PAR_DUPLICATED_COLUMN: return "Duplicated column names"; case TSDB_CODE_PAR_INVALID_TAGS_LENGTH: return "Tags length exceeds max length %d"; case TSDB_CODE_PAR_INVALID_ROW_LENGTH: return "Row length exceeds max length %d"; case TSDB_CODE_PAR_INVALID_COLUMNS_NUM: return "Illegal number of columns"; case TSDB_CODE_PAR_TOO_MANY_COLUMNS: return "Too many columns"; case TSDB_CODE_PAR_INVALID_FIRST_COLUMN: return "First column must be timestamp"; case TSDB_CODE_PAR_INVALID_VAR_COLUMN_LEN: return "Invalid binary/nchar column length"; case TSDB_CODE_PAR_INVALID_TAGS_NUM: return "Invalid number of tag columns"; case TSDB_CODE_PAR_INVALID_INTERNAL_PK: return "Invalid _c0 or _rowts expression"; case TSDB_CODE_PAR_INVALID_TIMELINE_FUNC: return "Invalid timeline function"; case TSDB_CODE_PAR_INVALID_PASSWD: return "Invalid password"; case TSDB_CODE_PAR_INVALID_ALTER_TABLE: return "Invalid alter table statement"; case TSDB_CODE_PAR_CANNOT_DROP_PRIMARY_KEY: return "Primary timestamp column cannot be dropped"; case TSDB_CODE_PAR_INVALID_MODIFY_COL: return "Only binary/nchar column length could be modified"; case TSDB_CODE_PAR_INVALID_TBNAME: return "Invalid tbname pseudo column"; case TSDB_CODE_OUT_OF_MEMORY: return "Out of memory"; default: return "Unknown error"; } } int32_t generateSyntaxErrMsg(SMsgBuf* pBuf, int32_t errCode, ...) { va_list vArgList; va_start(vArgList, errCode); vsnprintf(pBuf->buf, pBuf->len, getSyntaxErrFormat(errCode), vArgList); va_end(vArgList); return errCode; } int32_t buildInvalidOperationMsg(SMsgBuf* pBuf, const char* msg) { strncpy(pBuf->buf, msg, pBuf->len); return TSDB_CODE_TSC_INVALID_OPERATION; } int32_t buildSyntaxErrMsg(SMsgBuf* pBuf, const char* additionalInfo, const char* sourceStr) { const char* msgFormat1 = "syntax error near \'%s\'"; const char* msgFormat2 = "syntax error near \'%s\' (%s)"; const char* msgFormat3 = "%s"; const char* prefix = "syntax error"; if (sourceStr == NULL) { assert(additionalInfo != NULL); snprintf(pBuf->buf, pBuf->len, msgFormat1, additionalInfo); return TSDB_CODE_TSC_SQL_SYNTAX_ERROR; } char buf[64] = {0}; // only extract part of sql string strncpy(buf, sourceStr, tListLen(buf) - 1); if (additionalInfo != NULL) { snprintf(pBuf->buf, pBuf->len, msgFormat2, buf, additionalInfo); } else { const char* msgFormat = (0 == strncmp(sourceStr, prefix, strlen(prefix))) ? msgFormat3 : msgFormat1; snprintf(pBuf->buf, pBuf->len, msgFormat, buf); } return TSDB_CODE_TSC_SQL_SYNTAX_ERROR; } SSchema* getTableColumnSchema(const STableMeta* pTableMeta) { assert(pTableMeta != NULL); return (SSchema*)pTableMeta->schema; } static SSchema* getOneColumnSchema(const STableMeta* pTableMeta, int32_t colIndex) { assert(pTableMeta != NULL && pTableMeta->schema != NULL && colIndex >= 0 && colIndex < (getNumOfColumns(pTableMeta) + getNumOfTags(pTableMeta))); SSchema* pSchema = (SSchema*)pTableMeta->schema; return &pSchema[colIndex]; } SSchema* getTableTagSchema(const STableMeta* pTableMeta) { assert(pTableMeta != NULL && (pTableMeta->tableType == TSDB_SUPER_TABLE || pTableMeta->tableType == TSDB_CHILD_TABLE)); return getOneColumnSchema(pTableMeta, getTableInfo(pTableMeta).numOfColumns); } int32_t getNumOfColumns(const STableMeta* pTableMeta) { assert(pTableMeta != NULL); // table created according to super table, use data from super table return getTableInfo(pTableMeta).numOfColumns; } int32_t getNumOfTags(const STableMeta* pTableMeta) { assert(pTableMeta != NULL); return getTableInfo(pTableMeta).numOfTags; } STableComInfo getTableInfo(const STableMeta* pTableMeta) { assert(pTableMeta != NULL); return pTableMeta->tableInfo; } static uint32_t getTableMetaSize(const STableMeta* pTableMeta) { int32_t totalCols = 0; if (pTableMeta->tableInfo.numOfColumns >= 0) { totalCols = pTableMeta->tableInfo.numOfColumns + pTableMeta->tableInfo.numOfTags; } return sizeof(STableMeta) + totalCols * sizeof(SSchema); } STableMeta* tableMetaDup(const STableMeta* pTableMeta) { size_t size = getTableMetaSize(pTableMeta); STableMeta* p = taosMemoryMalloc(size); memcpy(p, pTableMeta, size); return p; } int32_t trimString(const char* src, int32_t len, char* dst, int32_t dlen) { if (len <= 0 || dlen <= 0) return 0; char delim = src[0]; int32_t j = 0; for (uint32_t k = 1; k < len - 1; ++k) { if (j >= dlen) { dst[j - 1] = '\0'; return j; } if (src[k] == delim && src[k + 1] == delim) { // deal with "", '' dst[j] = src[k + 1]; j++; k++; continue; } if (src[k] == '\\') { // deal with escape character if (src[k + 1] == 'n') { dst[j] = '\n'; } else if (src[k + 1] == 'r') { dst[j] = '\r'; } else if (src[k + 1] == 't') { dst[j] = '\t'; } else if (src[k + 1] == '\\') { dst[j] = '\\'; } else if (src[k + 1] == '\'') { dst[j] = '\''; } else if (src[k + 1] == '"') { dst[j] = '"'; } else if (src[k + 1] == '%' || src[k + 1] == '_') { dst[j++] = src[k]; dst[j] = src[k + 1]; } else { dst[j] = src[k + 1]; } j++; k++; continue; } dst[j] = src[k]; j++; } dst[j] = '\0'; return j; } static bool isValidateTag(char* input) { if (!input) return false; for (size_t i = 0; i < strlen(input); ++i) { if (isprint(input[i]) == 0) return false; } return true; } int parseJsontoTagData(const char* json, SKVRowBuilder* kvRowBuilder, SMsgBuf* pMsgBuf, int16_t startColId) { // set json NULL data uint8_t jsonNULL = TSDB_DATA_TYPE_NULL; int jsonIndex = startColId + 1; if (!json || strcasecmp(json, TSDB_DATA_NULL_STR_L) == 0) { tdAddColToKVRow(kvRowBuilder, jsonIndex, &jsonNULL, CHAR_BYTES); return TSDB_CODE_SUCCESS; } // set json real data cJSON* root = cJSON_Parse(json); if (root == NULL) { return buildSyntaxErrMsg(pMsgBuf, "json parse error", json); } int size = cJSON_GetArraySize(root); if (!cJSON_IsObject(root)) { return buildSyntaxErrMsg(pMsgBuf, "json error invalide value", json); } int retCode = 0; char* tagKV = NULL; SHashObj* keyHash = taosHashInit(8, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, false); for (int i = 0; i < size; i++) { cJSON* item = cJSON_GetArrayItem(root, i); if (!item) { qError("json inner error:%d", i); retCode = buildSyntaxErrMsg(pMsgBuf, "json inner error", json); goto end; } char* jsonKey = item->string; if (!isValidateTag(jsonKey)) { retCode = buildSyntaxErrMsg(pMsgBuf, "json key not validate", jsonKey); goto end; } // if(strlen(jsonKey) > TSDB_MAX_JSON_KEY_LEN){ // tscError("json key too long error"); // retCode = tscSQLSyntaxErrMsg(errMsg, "json key too long, more than 256", NULL); // goto end; // } size_t keyLen = strlen(jsonKey); if (keyLen == 0 || taosHashGet(keyHash, jsonKey, keyLen) != NULL) { continue; } // key: keyLen + VARSTR_HEADER_SIZE, value type: CHAR_BYTES, value reserved: LONG_BYTES tagKV = taosMemoryCalloc(keyLen + VARSTR_HEADER_SIZE + CHAR_BYTES + LONG_BYTES, 1); if (!tagKV) { retCode = TSDB_CODE_TSC_OUT_OF_MEMORY; goto end; } strncpy(varDataVal(tagKV), jsonKey, keyLen); varDataSetLen(tagKV, keyLen); if (taosHashGetSize(keyHash) == 0) { uint8_t jsonNotNULL = TSDB_DATA_TYPE_JSON; tdAddColToKVRow(kvRowBuilder, jsonIndex++, &jsonNotNULL, CHAR_BYTES); // add json type } taosHashPut(keyHash, jsonKey, keyLen, &keyLen, CHAR_BYTES); // add key to hash to remove dumplicate, value is useless if (item->type == cJSON_String) { // add json value format: type|data char* jsonValue = item->valuestring; int32_t valLen = (int32_t)strlen(jsonValue); int32_t totalLen = keyLen + VARSTR_HEADER_SIZE + valLen * TSDB_NCHAR_SIZE + VARSTR_HEADER_SIZE + CHAR_BYTES; char* tmp = taosMemoryRealloc(tagKV, totalLen); if (!tmp) { retCode = TSDB_CODE_TSC_OUT_OF_MEMORY; goto end; } tagKV = tmp; char* valueType = POINTER_SHIFT(tagKV, keyLen + VARSTR_HEADER_SIZE); char* valueData = POINTER_SHIFT(tagKV, keyLen + VARSTR_HEADER_SIZE + CHAR_BYTES); *valueType = TSDB_DATA_TYPE_NCHAR; if (valLen > 0 && !taosMbsToUcs4(jsonValue, valLen, (TdUcs4*)varDataVal(valueData), (int32_t)(valLen * TSDB_NCHAR_SIZE), &valLen)) { qError("charset:%s to %s. val:%s, errno:%s, convert failed.", DEFAULT_UNICODE_ENCODEC, tsCharset, jsonValue, strerror(errno)); retCode = buildSyntaxErrMsg(pMsgBuf, "charset convert json error", jsonValue); goto end; } varDataSetLen(valueData, valLen); tdAddColToKVRow(kvRowBuilder, jsonIndex++, tagKV, totalLen); } else if (item->type == cJSON_Number) { if (!isfinite(item->valuedouble)) { qError("json value is invalidate"); retCode = buildSyntaxErrMsg(pMsgBuf, "json value number is illegal", json); goto end; } char* valueType = POINTER_SHIFT(tagKV, keyLen + VARSTR_HEADER_SIZE); char* valueData = POINTER_SHIFT(tagKV, keyLen + VARSTR_HEADER_SIZE + CHAR_BYTES); *valueType = (item->valuedouble - (int64_t)(item->valuedouble) == 0) ? TSDB_DATA_TYPE_BIGINT : TSDB_DATA_TYPE_DOUBLE; if (*valueType == TSDB_DATA_TYPE_DOUBLE) *((double*)valueData) = item->valuedouble; else if (*valueType == TSDB_DATA_TYPE_BIGINT) *((int64_t*)valueData) = item->valueint; tdAddColToKVRow(kvRowBuilder, jsonIndex++, tagKV, keyLen + VARSTR_HEADER_SIZE + CHAR_BYTES + LONG_BYTES); } else if (item->type == cJSON_True || item->type == cJSON_False) { char* valueType = POINTER_SHIFT(tagKV, keyLen + VARSTR_HEADER_SIZE); char* valueData = POINTER_SHIFT(tagKV, keyLen + VARSTR_HEADER_SIZE + CHAR_BYTES); *valueType = TSDB_DATA_TYPE_BOOL; *valueData = (char)(item->valueint); tdAddColToKVRow(kvRowBuilder, jsonIndex++, tagKV, keyLen + VARSTR_HEADER_SIZE + CHAR_BYTES + CHAR_BYTES); } else if (item->type == cJSON_NULL) { char* valueType = POINTER_SHIFT(tagKV, keyLen + VARSTR_HEADER_SIZE); *valueType = TSDB_DATA_TYPE_NULL; tdAddColToKVRow(kvRowBuilder, jsonIndex++, tagKV, keyLen + VARSTR_HEADER_SIZE + CHAR_BYTES); } else { retCode = buildSyntaxErrMsg(pMsgBuf, "invalidate json value", json); goto end; } } if (taosHashGetSize(keyHash) == 0) { // set json NULL true tdAddColToKVRow(kvRowBuilder, jsonIndex, &jsonNULL, CHAR_BYTES); } end: taosMemoryFree(tagKV); taosHashCleanup(keyHash); cJSON_Delete(root); return retCode; }