diff --git a/src/client/src/tscParseLineProtocol.c b/src/client/src/tscParseLineProtocol.c index 5d1173e8cf3859403c765a514c07120cb0f18ba7..e45c1bc956a9c55c316141423c81c726df696b39 100644 --- a/src/client/src/tscParseLineProtocol.c +++ b/src/client/src/tscParseLineProtocol.c @@ -2140,64 +2140,331 @@ static int32_t parseSmlKey(TAOS_SML_KV *pKV, const char **index, SHashObj *pHash static int32_t parseSmlValue(TAOS_SML_KV *pKV, const char **index, bool *is_last_kv, SSmlLinesInfo* info, bool isTag) { - const char *start, *cur, *tmp; - int32_t ret = TSDB_CODE_SUCCESS; - char *value = NULL; - int16_t len = 0; - bool searchQuote = false; - start = cur = *index; + const char *start, *cur; + int32_t ret = TSDB_CODE_SUCCESS; + char *value = NULL; + int16_t len = 0; + + bool kv_done = false; + bool back_slash = false; + bool double_quote = false; + size_t line_len = 0; + + enum { + tag_common, + tag_lqoute, + tag_rqoute + } tag_state; + + enum { + val_common, + val_lqoute, + val_rqoute + } val_state; - //if field value is string - if (!isTag) { - if (*cur == '"') { - searchQuote = true; - cur += 1; - len += 1; - } else if (*cur == 'L' && *(cur + 1) == '"') { - searchQuote = true; - cur += 2; - len += 2; - } - } + start = cur = *index; + tag_state = tag_common; + val_state = val_common; while (1) { - // unescaped ',' or ' ' or '\0' identifies a value - if (((*cur == ',' || *cur == ' ' ) && *(cur - 1) != '\\') || *cur == '\0') { - if (searchQuote == true) { - //first quote ignored while searching - if (*(cur - 1) == '"' && len != 1 && len != 2) { - *is_last_kv = (*cur == ' ' || *cur == '\0') ? true : false; + if (isTag) { + /* ',', '=' and spaces MUST be escaped */ + switch (tag_state) { + case tag_common: + if (back_slash == true) { + if (*cur != ',' && *cur != '=' && *cur != ' ') { + tscError("SML:0x%"PRIx64" tag value: state(%d), incorrect character(%c) escaped", info->id, tag_state, *cur); + ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + goto error; + } + + back_slash = false; + cur++; + len++; + break; + } + + if (*cur == '"') { + if (cur == *index) { + tag_state = tag_lqoute; + } + cur += 1; + len += 1; + break; + } else if (*cur == 'L') { + line_len = strlen(*index); + + /* common character at the end */ + if (cur + 1 >= *index + line_len) { + *is_last_kv = true; + kv_done = true; + break; + } + + if (*(cur + 1) == '"') { + /* string starts here */ + if (cur + 1 == *index + 1) { + tag_state = tag_lqoute; + } + cur += 2; + len += 2; + break; + } + } + + switch (*cur) { + case '\\': + back_slash = true; + cur++; + len++; + break; + case ',': + kv_done = true; + break; + + case ' ': + /* fall through */ + case '\0': + *is_last_kv = true; + kv_done = true; break; - } else if (*cur == '\0') { + + default: + cur++; + len++; + } + + break; + case tag_lqoute: + if (back_slash == true) { + if (*cur != ',' && *cur != '=' && *cur != ' ') { + tscError("SML:0x%"PRIx64" tag value: state(%d), incorrect character(%c) escaped", info->id, tag_state, *cur); + ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + goto error; + } + + back_slash = false; + cur++; + len++; + break; + } else if (double_quote == true) { + if (*cur != ' ' && *cur != ',' && *cur != '\0') { + tscError("SML:0x%"PRIx64" tag value: state(%d), incorrect character(%c) behind closing \"", info->id, tag_state, *cur); + ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + goto error; + } + + if (*cur == ' ' || *cur == '\0') { + *is_last_kv = true; + } + + double_quote = false; + tag_state = tag_rqoute; + break; + } + + switch (*cur) { + case '\\': + back_slash = true; + cur++; + len++; + break; + + case '"': + double_quote = true; + cur++; + len++; + break; + + case ',': + /* fall through */ + case '=': + /* fall through */ + case ' ': + if (*(cur - 1) != '\\') { + tscError("SML:0x%"PRIx64" tag value: state(%d), character(%c) not escaped", info->id, tag_state, *cur); + ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + kv_done = true; + } + break; + + case '\0': + tscError("SML:0x%"PRIx64" tag value: state(%d), closing \" not found", info->id, tag_state); ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; - goto error; - } else { + kv_done = true; + break; + + default: cur++; len++; - continue; } + + break; + + default: + kv_done = true; } - //unescaped ' ' or '\0' indicates end of value - *is_last_kv = (*cur == ' ' || *cur == '\0') ? true : false; - if (*cur == ' ' && *(cur + 1) == ' ') { - cur++; - continue; - } else { + } else { + switch (val_state) { + case val_common: + if (back_slash == true) { + if (*cur != '\\' && *cur != '"') { + tscError("SML:0x%"PRIx64" field value: state(%d), incorrect character(%c) escaped", info->id, val_state, *cur); + ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + goto error; + } + + back_slash = false; + cur++; + len++; + break; + } + + if (*cur == '"') { + if (cur == *index) { + val_state = val_lqoute; + } else { + if (*(cur - 1) != '\\') { + tscError("SML:0x%"PRIx64" field value: state(%d), \" not escaped", info->id, val_state); + ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + goto error; + } + } + + cur += 1; + len += 1; + break; + } else if (*cur == 'L') { + line_len = strlen(*index); + + /* common character at the end */ + if (cur + 1 >= *index + line_len) { + *is_last_kv = true; + kv_done = true; + break; + } + + if (*(cur + 1) == '"') { + /* string starts here */ + if (cur + 1 == *index + 1) { + val_state = val_lqoute; + cur += 2; + len += 2; + } else { + /* MUST at the end of string */ + if (cur + 2 >= *index + line_len) { + cur += 2; + len += 2; + *is_last_kv = true; + kv_done = true; + } else { + if (*(cur + 2) != ' ' && *(cur + 2) != ',') { + tscError("SML:0x%"PRIx64" field value: state(%d), not closing character(L\")", info->id, val_state); + ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + goto error; + } else { + if (*(cur + 2) == ' ') { + *is_last_kv = true; + } + + cur += 2; + len += 2; + kv_done = true; + } + } + } + break; + } + } + + switch (*cur) { + case '\\': + back_slash = true; + cur++; + len++; + break; + + case ',': + kv_done = true; + break; + + case ' ': + /* fall through */ + case '\0': + *is_last_kv = true; + kv_done = true; + break; + + default: + cur++; + len++; + } + break; + case val_lqoute: + if (back_slash == true) { + if (*cur != '\\' && *cur != '"') { + tscError("SML:0x%"PRIx64" field value: state(%d), incorrect character(%c) escaped", info->id, val_state, *cur); + ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + goto error; + } + + back_slash = false; + cur++; + len++; + break; + } else if (double_quote == true) { + if (*cur != ' ' && *cur != ',' && *cur != '\0') { + tscError("SML:0x%"PRIx64" field value: state(%d), incorrect character(%c) behind closing \"", info->id, val_state, *cur); + ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + goto error; + } + + if (*cur == ' ' || *cur == '\0') { + *is_last_kv = true; + } + + double_quote = false; + val_state = val_rqoute; + break; + } + + switch (*cur) { + case '\\': + back_slash = true; + cur++; + len++; + break; + + case '"': + double_quote = true; + cur++; + len++; + break; + + case '\0': + tscError("SML:0x%"PRIx64" field value: state(%d), closing \" not found", info->id, val_state); + ret = TSDB_CODE_TSC_LINE_SYNTAX_ERROR; + kv_done = true; + break; + + default: + cur++; + len++; + } + + break; + default: + kv_done = true; } } - //Escape special character - if (*cur == '\\') { - tmp = cur; - escapeSpecialCharacter(isTag ? 2 : 3, &cur); - if (tmp != cur) { - continue; - } + + if (kv_done == true) { + break; } - cur++; - len++; } - if (len == 0) { + + if (len == 0 || ret != TSDB_CODE_SUCCESS) { free(pKV->key); pKV->key = NULL; return TSDB_CODE_TSC_LINE_SYNTAX_ERROR; diff --git a/tests/pytest/insert/line_insert.py b/tests/pytest/insert/line_insert.py index d95df3a8491f73f7279e583afd446a7182adf823..4c873ec4a4efa6d1877a83d5b3942f5eb990a714 100644 --- a/tests/pytest/insert/line_insert.py +++ b/tests/pytest/insert/line_insert.py @@ -32,9 +32,9 @@ class TDTestCase: tdSql.execute('create stable ste(ts timestamp, f int) tags(t1 bigint)') - lines = [ "st,t1=3i64,t2=4f64,t3=\"t3\" c1=3i64,c3=L\"\"\"a pa,\"s si,t \"\"\",c2=false,c4=4f64 1626006833639000000", + lines = [ "st,t1=3i64,t2=4f64,t3=\"t3\" c1=3i64,c3=L\"\\\"\\\"a pa,\\\"s si,t \\\"\\\"\",c2=false,c4=4f64 1626006833639000000", "st,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64 1626006833640000000", - "ste,t2=5f64,t3=L\"ste\" c1=true,c2=4i64,c3=\" i,\"a \"m,\"\"\" 1626056811823316532", + "ste,t2=5f64,t3=L\"ste\" c1=true,c2=4i64,c3=\" i,\\\"a \\\"m,\\\"\\\"\" 1626056811823316532", "stf,t1=4i64,t3=\"t4\",t2=5f64,t4=5f64 c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64,c6=7u64 1626006933640000000", "st,t1=4i64,t2=5f64,t3=\"t4\" c1=3i64,c3=L\"passitagain\",c2=true,c4=5f64 1626006833642000000", "ste,t2=5f64,t3=L\"ste2\" c3=\"iamszhou\",c4=false 1626056811843316532", @@ -147,7 +147,7 @@ class TDTestCase: tdSql.query('select tbname from str') tdSql.checkRows(3) - ###Special Character and keyss + ###Special Character and keys self._conn.schemaless_insert([ "1234,id=3456,abc=4i64,def=3i64 123=3i64,int=2i64,bool=false,into=5f64,column=7u64,!@#$.%^&*()=false 1626006933641", "int,id=and,123=4i64,smallint=5f64,double=5f64,of=3i64,key=L\"passitagin_stf\",!@#$.%^&*()=false abc=false 1626006933654", @@ -193,6 +193,15 @@ class TDTestCase: #tdSql.query('select * from `create`') #tdSql.checkRows(1) + + self._conn.schemaless_insert([ + "sts,t1=abc,t2=ab\"c,t3=ab\\,c,t4=ab\\=c,t5=ab\\ c c1=3i64,c3=L\"passitagin\",c2=true,c4=5f64,c5=5f64,c6=\"abc\" 1626006833640000000", + "sts,t1=abc c1=3i64,c2=false,c3=L\"{\\\"date\\\":\\\"2020-01-01 08:00:00.000\\\",\\\"temperature\\\":20}\",c6=\"ab\\\\c\" 1626006833640000000" + ], TDSmlProtocolType.LINE.value, TDSmlTimestampType.NANO_SECOND.value) + + tdSql.query('select tbname from sts') + tdSql.checkRows(2) + def stop(self): tdSql.close() tdLog.success("%s successfully executed" % __file__)