提交 9d013058 编写于 作者: X xywang

[TD-13875]<fix>: refactored parseSmlValue function & updated test cases

上级 64c16a14
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#include "taos.h" #include "taos.h"
#include "tscParseLine.h" #include "tscParseLine.h"
#include "cJSON.h"
typedef struct { typedef struct {
char sTableName[TSDB_TABLE_NAME_LEN + TS_BACKQUOTE_CHAR_SIZE]; char sTableName[TSDB_TABLE_NAME_LEN + TS_BACKQUOTE_CHAR_SIZE];
...@@ -2141,145 +2140,331 @@ static int32_t parseSmlKey(TAOS_SML_KV *pKV, const char **index, SHashObj *pHash ...@@ -2141,145 +2140,331 @@ static int32_t parseSmlKey(TAOS_SML_KV *pKV, const char **index, SHashObj *pHash
static int32_t parseSmlValue(TAOS_SML_KV *pKV, const char **index, static int32_t parseSmlValue(TAOS_SML_KV *pKV, const char **index,
bool *is_last_kv, SSmlLinesInfo* info, bool isTag) { bool *is_last_kv, SSmlLinesInfo* info, bool isTag) {
const char *start, *cur, *tmp; const char *start, *cur;
int32_t ret = TSDB_CODE_SUCCESS; int32_t ret = TSDB_CODE_SUCCESS;
int32_t braces = 0; char *value = NULL;
const char *json_start = NULL, *json_end = NULL; int16_t len = 0;
cJSON *json = NULL;
char *value = NULL, *json_tmp, *json_cur; bool kv_done = false;
int16_t len = 0; bool back_slash = false;
bool searchQuote = false; bool double_quote = false;
start = cur = *index; size_t line_len = 0;
enum {
tag_common,
tag_lqoute,
tag_rqoute
} tag_state;
enum {
val_common,
val_lqoute,
val_rqoute
} val_state;
//if field value is string start = cur = *index;
if (!isTag) { tag_state = tag_common;
if (*cur == '"') { val_state = val_common;
searchQuote = true;
cur += 1;
len += 1;
} else if (*cur == 'L' && *(cur + 1) == '"') {
searchQuote = true;
cur += 2;
len += 2;
}
}
while (1) { while (1) {
if (*cur == '{') { if (isTag) {
if (len == 0 || (len == 1 && cur[len - 1] != '"') || (len == 2 && cur[len - 1] != '"' && cur[len - 2] != 'L')) { /* ',', '=' and spaces MUST be escaped */
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; switch (tag_state) {
goto error; case tag_common:
} if (back_slash == true) {
if (*cur != ',' && *cur != '=' && *cur != ' ') {
tscError("SML:0x%"PRIx64" tag value: state(%d), incorrect character(%c) escaped", info->id, tag_state, *cur);
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
goto error;
}
json_start = cur; back_slash = false;
cur++;
len++;
break;
}
if (*cur == '"') {
if (cur == *index) {
tag_state = tag_lqoute;
}
cur += 1;
len += 1;
break;
} else if (*cur == 'L') {
line_len = strlen(*index);
/* common character at the end */
if (cur + 1 >= *index + line_len) {
*is_last_kv = true;
kv_done = true;
break;
}
if (*(cur + 1) == '"') {
/* string starts here */
if (cur + 1 == *index + 1) {
tag_state = tag_lqoute;
}
cur += 2;
len += 2;
break;
}
}
switch (*cur) {
case '\\':
back_slash = true;
cur++;
len++;
break;
case ',':
kv_done = true;
break;
case ' ':
/* fall through */
case '\0':
*is_last_kv = true;
kv_done = true;
break;
default:
cur++;
len++;
}
braces++; break;
cur++; case tag_lqoute:
len++; if (back_slash == true) {
if (*cur != ',' && *cur != '=' && *cur != ' ') {
tscError("SML:0x%"PRIx64" tag value: state(%d), incorrect character(%c) escaped", info->id, tag_state, *cur);
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
goto error;
}
while (*cur != '\0') { back_slash = false;
if (*cur == '{') { cur++;
if (*(cur - 1) == '{') { len++;
break;
} else if (double_quote == true) {
if (*cur != ' ' && *cur != ',' && *cur != '\0') {
tscError("SML:0x%"PRIx64" tag value: state(%d), incorrect character(%c) behind closing \"", info->id, tag_state, *cur);
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
goto error; goto error;
} }
braces++; if (*cur == ' ' || *cur == '\0') {
*is_last_kv = true;
}
double_quote = false;
tag_state = tag_rqoute;
break;
}
switch (*cur) {
case '\\':
back_slash = true;
cur++;
len++;
break;
case '"':
double_quote = true;
cur++;
len++;
break;
case ',':
/* fall through */
case '=':
/* fall through */
case ' ':
if (*(cur - 1) != '\\') {
tscError("SML:0x%"PRIx64" tag value: state(%d), character(%c) not escaped", info->id, tag_state, *cur);
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
kv_done = true;
}
break;
case '\0':
tscError("SML:0x%"PRIx64" tag value: state(%d), closing \" not found", info->id, tag_state);
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
kv_done = true;
break;
default:
cur++;
len++;
} }
if (*cur == '}') { break;
if (*(cur - 1) == '}') {
default:
kv_done = true;
}
} else {
switch (val_state) {
case val_common:
if (back_slash == true) {
if (*cur != '\\' && *cur != '"') {
tscError("SML:0x%"PRIx64" field value: state(%d), incorrect character(%c) escaped", info->id, val_state, *cur);
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
goto error; goto error;
} }
braces--; back_slash = false;
cur++;
len++;
break;
} }
cur++; if (*cur == '"') {
len++; if (cur == *index) {
val_state = val_lqoute;
} else {
if (*(cur - 1) != '\\') {
tscError("SML:0x%"PRIx64" field value: state(%d), \" not escaped", info->id, val_state);
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
goto error;
}
}
if (braces == 0) { cur += 1;
len += 1;
break; break;
} else if (*cur == 'L') {
line_len = strlen(*index);
/* common character at the end */
if (cur + 1 >= *index + line_len) {
*is_last_kv = true;
kv_done = true;
break;
}
if (*(cur + 1) == '"') {
/* string starts here */
if (cur + 1 == *index + 1) {
val_state = val_lqoute;
cur += 2;
len += 2;
} else {
/* MUST at the end of string */
if (cur + 2 >= *index + line_len) {
cur += 2;
len += 2;
*is_last_kv = true;
kv_done = true;
} else {
if (*(cur + 2) != ' ' && *(cur + 2) != ',') {
tscError("SML:0x%"PRIx64" field value: state(%d), not closing character(L\")", info->id, val_state);
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
goto error;
} else {
if (*(cur + 2) == ' ') {
*is_last_kv = true;
}
cur += 2;
len += 2;
kv_done = true;
}
}
}
break;
}
} }
}
if (braces != 0) { switch (*cur) {
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; case '\\':
goto error; back_slash = true;
} cur++;
len++;
break;
json_end = cur; case ',':
kv_done = true;
break;
value = calloc(1, json_end - json_start + 1); case ' ':
if (value == NULL) { /* fall through */
tscError("SML:0x%"PRIx64" Failed allocte for json sml type value", info->id); case '\0':
ret = TSDB_CODE_TSC_OUT_OF_MEMORY; *is_last_kv = true;
goto error; kv_done = true;
} break;
//check "\\," default:
memcpy(value, json_start, json_end - json_start); cur++;
json_tmp = json_cur = value; len++;
while (json_tmp[1] != '\0') { }
if (json_tmp[0] == '\\' && json_tmp[1] == ',') {
json_tmp++;
}
*json_cur++ = *json_tmp++; break;
} case val_lqoute:
if (back_slash == true) {
if (*cur != '\\' && *cur != '"') {
tscError("SML:0x%"PRIx64" field value: state(%d), incorrect character(%c) escaped", info->id, val_state, *cur);
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
goto error;
}
back_slash = false;
cur++;
len++;
break;
} else if (double_quote == true) {
if (*cur != ' ' && *cur != ',' && *cur != '\0') {
tscError("SML:0x%"PRIx64" field value: state(%d), incorrect character(%c) behind closing \"", info->id, val_state, *cur);
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
goto error;
}
*json_cur++ = *json_tmp++; if (*cur == ' ' || *cur == '\0') {
*json_cur = '\0'; *is_last_kv = true;
}
//check if json is valid double_quote = false;
json = cJSON_Parse(value); val_state = val_rqoute;
if (json == NULL) { break;
free(value); }
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
goto error;
}
free(value); switch (*cur) {
} case '\\':
back_slash = true;
cur++;
len++;
break;
// unescaped ',' or ' ' or '\0' identifies a value case '"':
if (((*cur == ',' || *cur == ' ' ) && *(cur - 1) != '\\') || *cur == '\0') { double_quote = true;
if (searchQuote == true) { cur++;
//first quote ignored while searching len++;
if (*(cur - 1) == '"' && len != 1 && len != 2) {
*is_last_kv = (*cur == ' ' || *cur == '\0') ? true : false;
break; break;
} else if (*cur == '\0') {
case '\0':
tscError("SML:0x%"PRIx64" field value: state(%d), closing \" not found", info->id, val_state);
ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
goto error; kv_done = true;
} else { break;
default:
cur++; cur++;
len++; len++;
continue;
} }
}
//unescaped ' ' or '\0' indicates end of value
*is_last_kv = (*cur == ' ' || *cur == '\0') ? true : false;
if (*cur == ' ' && *(cur + 1) == ' ') {
cur++;
continue;
} else {
break; break;
default:
kv_done = true;
} }
} }
//Escape special character
if (*cur == '\\') { if (kv_done == true) {
tmp = cur; break;
escapeSpecialCharacter(isTag ? 2 : 3, &cur);
if (tmp != cur) {
continue;
}
} }
cur++;
len++;
} }
if (len == 0) {
if (len == 0 || ret != TSDB_CODE_SUCCESS) {
free(pKV->key); free(pKV->key);
pKV->key = NULL; pKV->key = NULL;
return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; return TSDB_CODE_TSC_LINE_SYNTAX_ERROR;
......
...@@ -32,9 +32,9 @@ class TDTestCase: ...@@ -32,9 +32,9 @@ class TDTestCase:
tdSql.execute('create stable ste(ts timestamp, f int) tags(t1 bigint)') tdSql.execute('create stable ste(ts timestamp, f int) tags(t1 bigint)')
lines = [ "st,t1=3i64,t2=4f64,t3=\"t3\" c1=3i64,c3=L\"\"\"a pa,\"s si,t \"\"\",c2=false,c4=4f64 1626006833639000000", lines = [ "st,t1=3i64,t2=4f64,t3=\"t3\" c1=3i64,c3=L\"\\\"\\\"a pa,\\\"s si,t \\\"\\\"\",c2=false,c4=4f64 1626006833639000000",
"st,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64 1626006833640000000", "st,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64 1626006833640000000",
"ste,t2=5f64,t3=L\"ste\" c1=true,c2=4i64,c3=\" i,\"a \"m,\"\"\" 1626056811823316532", "ste,t2=5f64,t3=L\"ste\" c1=true,c2=4i64,c3=\" i,\\\"a \\\"m,\\\"\\\"\" 1626056811823316532",
"stf,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64,c6=7u64 1626006933640000000", "stf,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64,c6=7u64 1626006933640000000",
"st,t1=4i64,t2=5f64,t3=\"t4\" c1=3i64,c3=L\"passitagain\",c2=true,c4=5f64 1626006833642000000", "st,t1=4i64,t2=5f64,t3=\"t4\" c1=3i64,c3=L\"passitagain\",c2=true,c4=5f64 1626006833642000000",
"ste,t2=5f64,t3=L\"ste2\" c3=\"iamszhou\",c4=false 1626056811843316532", "ste,t2=5f64,t3=L\"ste2\" c3=\"iamszhou\",c4=false 1626056811843316532",
...@@ -147,7 +147,7 @@ class TDTestCase: ...@@ -147,7 +147,7 @@ class TDTestCase:
tdSql.query('select tbname from str') tdSql.query('select tbname from str')
tdSql.checkRows(3) tdSql.checkRows(3)
###Special Character and keyss ###Special Character and keys
self._conn.schemaless_insert([ self._conn.schemaless_insert([
"1234,id=3456,abc=4i64,def=3i64 123=3i64,int=2i64,bool=false,into=5f64,column=7u64,!@#$.%^&*()=false 1626006933641", "1234,id=3456,abc=4i64,def=3i64 123=3i64,int=2i64,bool=false,into=5f64,column=7u64,!@#$.%^&*()=false 1626006933641",
"int,id=and,123=4i64,smallint=5f64,double=5f64,of=3i64,key=L\"passitagin_stf\",!@#$.%^&*()=false abc=false 1626006933654", "int,id=and,123=4i64,smallint=5f64,double=5f64,of=3i64,key=L\"passitagin_stf\",!@#$.%^&*()=false abc=false 1626006933654",
...@@ -193,6 +193,15 @@ class TDTestCase: ...@@ -193,6 +193,15 @@ class TDTestCase:
#tdSql.query('select * from `create`') #tdSql.query('select * from `create`')
#tdSql.checkRows(1) #tdSql.checkRows(1)
self._conn.schemaless_insert([
"sts,t1=abc,t2=ab\"c,t3=ab\\,c,t4=ab\\=c,t5=ab\\ c c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64,c6=\"abc\" 1626006833640000000",
"sts,t1=abc c1=3i64,c2=false,c3=L\"{\\\"date\\\":\\\"2020-01-01 08:00:00.000\\\",\\\"temperature\\\":20}\",c6=\"ab\\\\c\" 1626006833640000000"
], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value)
tdSql.query('select tbname from sts')
tdSql.checkRows(2)
def stop(self): def stop(self):
tdSql.close() tdSql.close()
tdLog.success("%s successfully executed" % __file__) tdLog.success("%s successfully executed" % __file__)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册