提交 693ee662 编写于 作者: weixin_48148422's avatar weixin_48148422

fix several issues in string token parsing

1. the shell should not remove escape sequence \' and \" in a string.
2. `tsParseTime` should not unescape the next string token (this issue
appears after the first issue was fixed).
3. `value[4] != '-'` in `tsParseTime` crashes in rare case if `value[4]`
is in unallocated virtual memory.
4. `operator[x]` and `delimiter[x]` may result in unexcepted behavior
as string is utf-8 encoded and `x < 0` could be true.
5. changes the behavior of `tscGetToken` a little: now, unescaped single
quotation is allowed in double quoted strings and unescaped double quotation
is allowed in single quoted strings.
6. minor performance improvements and other improvements.
上级 90e5690d
......@@ -94,16 +94,12 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char
int64_t useconds = 0;
char *pTokenEnd = *next;
tscGetToken(pTokenEnd, &token, &tokenlen);
if (tokenlen == 0 && strlen(value) == 0) {
INVALID_SQL_RET_MSG(error, "missing time stamp");
}
if (strncmp(value, "now", 3) == 0 && valuelen == 3) {
if (valuelen == 3 && (strncmp(value, "now", 3) == 0)) {
useconds = taosGetTimestamp(timePrec);
} else if (strncmp(value, "0", 1) == 0 && valuelen == 1) {
} else if (valuelen == 1 && value[0] == '0') {
// do nothing
} else if (value[4] != '-') {
} else if (valuelen <= 4 || value[4] != '-') {
for (int32_t i = 0; i < valuelen; ++i) {
/*
* filter illegal input.
......@@ -126,7 +122,6 @@ int tsParseTime(char *value, int32_t valuelen, int64_t *time, char **next, char
for (int k = valuelen; value[k] != '\0'; k++) {
if (value[k] == ' ' || value[k] == '\t') continue;
if (value[k] == ',') {
*next = pTokenEnd;
*time = useconds;
return 0;
}
......
......@@ -30,7 +30,7 @@ typedef struct SSQLToken {
} SSQLToken;
char *tscGetToken(char *string, char **token, int *tokenLen);
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters);
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters);
/**
* tokenizer for sql string
......
......@@ -111,6 +111,7 @@ TAOS *shellInit(struct arguments *args) {
void shellReplaceCtrlChar(char *str) {
_Bool ctrlOn = false;
char *pstr = NULL;
char quote = 0;
for (pstr = str; *str != '\0'; ++str) {
if (ctrlOn) {
......@@ -131,6 +132,13 @@ void shellReplaceCtrlChar(char *str) {
*pstr = '\\';
pstr++;
break;
case '\'':
case '"':
if (quote) {
*pstr++ = '\\';
*pstr++ = *str;
}
break;
default:
break;
}
......@@ -139,6 +147,11 @@ void shellReplaceCtrlChar(char *str) {
if (*str == '\\') {
ctrlOn = true;
} else {
if (quote == *str) {
quote = 0;
} else if (*str == '\'' || *str == '"') {
quote = *str;
}
*pstr = *str;
pstr++;
}
......
......@@ -23,77 +23,68 @@
#include "shash.h"
#include "tstoken.h"
static char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
static const char operator[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '$', '%', '&', 0, '(', ')', '*', '+',
0, '-', 0, '/', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '<', '=', '>', 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, '[', 0, ']', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '|', 0, 0, 0};
static char delimiter[] = {
static const char delimiter[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ',', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ';', 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
bool isCharInDelimiter(char c, char *delimiter) {
for (int i = 0; i < strlen(delimiter); i++) {
if (delimiter[i] == c) return true;
}
return false;
}
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, char *delimiters) {
while (*string != 0) {
if (isCharInDelimiter(*string, delimiters)) {
char *tscGetTokenDelimiter(char *string, char **token, int *tokenLen, const char *delimiters) {
while ((*string != 0) && strchr(delimiters, *string)) {
++string;
} else {
break;
}
}
*token = string;
char *str = string;
*tokenLen = 0;
while (*str != 0) {
if (!isCharInDelimiter(*str, delimiters)) {
*tokenLen = *tokenLen + 1;
str++;
} else {
break;
}
while ((*str != 0) && (strchr(delimiters, *str) == NULL)) {
++str;
}
*tokenLen = str - string;
return string;
}
static bool isOperator(char c) {
return (c < 0) ? false : (operator[c] != 0);
}
static bool isDelimiter(char c) {
return (c < 0) ? false : (delimiter[c] != 0);
}
char *tscGetToken(char *string, char **token, int *tokenLen) {
char quote = 0;
while (*string != 0) {
if (delimiter[*string]) {
if (isDelimiter(*string)) {
++string;
} else {
break;
}
}
char quotaChar = 0;
if (*string == '\'' || *string == '\"') {
quote = 1;
quotaChar = *string;
quote = *string;
string++;
}
*token = string;
/* not in string, return token */
if (*string > 0 && operator[*string] && quote == 0) {
if (quote == 0 && isOperator(*string)) {
string++;
/* handle the case: insert into tabx using stable1 tags(-1)/tags(+1)
* values(....) */
if (operator[*string] &&(*string != '(' && *string != ')' && *string != '-' && *string != '+'))
if (isOperator(*string) &&(*string != '(' && *string != ')' && *string != '-' && *string != '+'))
*tokenLen = 2;
else
*tokenLen = 1;
......@@ -102,28 +93,24 @@ char *tscGetToken(char *string, char **token, int *tokenLen) {
while (*string != 0) {
if (quote) {
// handle escape situation: '\"', the " should not be eliminated
if (*string == quotaChar) {
if (*(string - 1) != '\\') {
break;
} else {
if (*string == '\'' || *string == '"') {
// handle escape situation, " and ' should not be eliminated
if (*(string - 1) == '\\') {
shiftStr(string - 1, string);
continue;
} else if (*string == quote) {
break;
}
} else {
++string;
}
} else {
if (delimiter[*string]) break;
if (operator[*string]) break;
++string;
} else if (isDelimiter(*string) || isOperator(*string)) {
break;
}
++string;
}
*tokenLen = (int)(string - *token);
if (quotaChar != 0 && *string != 0 && *(string + 1) != 0) {
if (quote && *string != 0) {
return string + 1;
} else {
return string;
......@@ -135,7 +122,7 @@ void shiftStr(char *dst, char *src) {
do {
dst[i] = src[i];
i++;
} while (delimiter[src[i]] == 0);
} while (!isDelimiter(src[i]));
src[i - 1] = ' ';
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册