parTokenizer.c 21.3 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
hjxilinx 已提交
18
#include "taosdef.h"
X
Xiaoyu Wang 已提交
19
#include "thash.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

X
Xiaoyu Wang 已提交
29
// clang-format off
S
slguan 已提交
30
// keywords in sql string
H
hzcheng 已提交
31
static SKeyword keywordTable[] = {
X
Xiaoyu Wang 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
    {"ACCOUNT",              TK_ACCOUNT},
    {"ACCOUNTS",             TK_ACCOUNTS},
    {"ADD",                  TK_ADD},
    {"AGGREGATE",            TK_AGGREGATE},
    {"ALL",                  TK_ALL},
    {"ALTER",                TK_ALTER},
    {"ANALYZE",              TK_ANALYZE},
    {"AND",                  TK_AND},
    {"APPS",                 TK_APPS},
    {"AS",                   TK_AS},
    {"ASC",                  TK_ASC},
    {"AT_ONCE",              TK_AT_ONCE},
    {"BALANCE",              TK_BALANCE},
    {"BETWEEN",              TK_BETWEEN},
    {"BIGINT",               TK_BIGINT},
X
Xiaoyu Wang 已提交
47
    {"BINARY",               TK_BINARY},
X
Xiaoyu Wang 已提交
48 49 50 51 52 53 54 55 56
    {"BNODE",                TK_BNODE},
    {"BNODES",               TK_BNODES},
    {"BOOL",                 TK_BOOL},
    {"BUFFER",               TK_BUFFER},
    {"BUFSIZE",              TK_BUFSIZE},
    {"BY",                   TK_BY},
    {"CACHE",                TK_CACHE},
    {"CACHEMODEL",           TK_CACHEMODEL},
    {"CACHESIZE",            TK_CACHESIZE},
X
Xiaoyu Wang 已提交
57
    {"CASE",                 TK_CASE},
X
Xiaoyu Wang 已提交
58 59 60 61 62 63
    {"CAST",                 TK_CAST},
    {"CLIENT_VERSION",       TK_CLIENT_VERSION},
    {"CLUSTER",              TK_CLUSTER},
    {"COLUMN",               TK_COLUMN},
    {"COMMENT",              TK_COMMENT},
    {"COMP",                 TK_COMP},
X
Xiaoyu Wang 已提交
64
    {"COMPACT",              TK_COMPACT},
X
Xiaoyu Wang 已提交
65 66
    {"CONNECTION",           TK_CONNECTION},
    {"CONNECTIONS",          TK_CONNECTIONS},
X
Xiaoyu Wang 已提交
67
    {"CONNS",                TK_CONNS},
X
Xiaoyu Wang 已提交
68 69 70 71 72 73 74 75 76 77
    {"CONSUMER",             TK_CONSUMER},
    {"CONSUMERS",            TK_CONSUMERS},
    {"CONTAINS",             TK_CONTAINS},
    {"COUNT",                TK_COUNT},
    {"CREATE",               TK_CREATE},
    {"CURRENT_USER",         TK_CURRENT_USER},
    {"DATABASE",             TK_DATABASE},
    {"DATABASES",            TK_DATABASES},
    {"DBS",                  TK_DBS},
    {"DELETE",               TK_DELETE},
78
    {"DELETE_MARK",          TK_DELETE_MARK},
X
Xiaoyu Wang 已提交
79 80 81 82 83 84 85 86 87
    {"DESC",                 TK_DESC},
    {"DESCRIBE",             TK_DESCRIBE},
    {"DISTINCT",             TK_DISTINCT},
    {"DISTRIBUTED",          TK_DISTRIBUTED},
    {"DNODE",                TK_DNODE},
    {"DNODES",               TK_DNODES},
    {"DOUBLE",               TK_DOUBLE},
    {"DROP",                 TK_DROP},
    {"DURATION",             TK_DURATION},
X
Xiaoyu Wang 已提交
88
    {"ELSE",                 TK_ELSE},
X
Xiaoyu Wang 已提交
89
    {"ENABLE",               TK_ENABLE},
X
Xiaoyu Wang 已提交
90
    {"END",                  TK_END},
X
Xiaoyu Wang 已提交
91 92 93
    {"EXISTS",               TK_EXISTS},
    {"EXPIRED",              TK_EXPIRED},
    {"EXPLAIN",              TK_EXPLAIN},
X
Xiaoyu Wang 已提交
94
    {"EVENT_WINDOW",         TK_EVENT_WINDOW},
X
Xiaoyu Wang 已提交
95 96 97
    {"EVERY",                TK_EVERY},
    {"FILE",                 TK_FILE},
    {"FILL",                 TK_FILL},
98
    {"FILL_HISTORY",         TK_FILL_HISTORY},
X
Xiaoyu Wang 已提交
99 100 101 102
    {"FIRST",                TK_FIRST},
    {"FLOAT",                TK_FLOAT},
    {"FLUSH",                TK_FLUSH},
    {"FROM",                 TK_FROM},
103
    {"FORCE",                TK_FORCE},
X
Xiaoyu Wang 已提交
104 105 106 107 108 109 110 111 112 113 114 115 116 117
    {"FUNCTION",             TK_FUNCTION},
    {"FUNCTIONS",            TK_FUNCTIONS},
    {"GRANT",                TK_GRANT},
    {"GRANTS",               TK_GRANTS},
    {"GROUP",                TK_GROUP},
    {"HAVING",               TK_HAVING},
    {"IF",                   TK_IF},
    {"IGNORE",               TK_IGNORE},
    {"IMPORT",               TK_IMPORT},
    {"IN",                   TK_IN},
    {"INDEX",                TK_INDEX},
    {"INDEXES",              TK_INDEXES},
    {"INNER",                TK_INNER},
    {"INSERT",               TK_INSERT},
X
Xiaoyu Wang 已提交
118
    {"INT",                  TK_INT},
X
Xiaoyu Wang 已提交
119 120 121 122 123 124 125 126
    {"INTEGER",              TK_INTEGER},
    {"INTERVAL",             TK_INTERVAL},
    {"INTO",                 TK_INTO},
    {"IS",                   TK_IS},
    {"JOIN",                 TK_JOIN},
    {"JSON",                 TK_JSON},
    {"KEEP",                 TK_KEEP},
    {"KILL",                 TK_KILL},
127
    {"LANGUAGE",             TK_LANGUAGE},
X
Xiaoyu Wang 已提交
128 129
    {"LAST",                 TK_LAST},
    {"LAST_ROW",             TK_LAST_ROW},
130
    {"LEADER",               TK_LEADER},
X
Xiaoyu Wang 已提交
131
    {"LICENCES",             TK_LICENCES},
X
Xiaoyu Wang 已提交
132 133 134 135 136 137 138
    {"LIKE",                 TK_LIKE},
    {"LIMIT",                TK_LIMIT},
    {"LINEAR",               TK_LINEAR},
    {"LOCAL",                TK_LOCAL},
    {"MATCH",                TK_MATCH},
    {"MAXROWS",              TK_MAXROWS},
    {"MAX_DELAY",            TK_MAX_DELAY},
139
    {"MAX_SPEED",            TK_MAX_SPEED},
X
Xiaoyu Wang 已提交
140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
    {"MERGE",                TK_MERGE},
    {"META",                 TK_META},
    {"MINROWS",              TK_MINROWS},
    {"MINUS",                TK_MINUS},
    {"MNODE",                TK_MNODE},
    {"MNODES",               TK_MNODES},
    {"MODIFY",               TK_MODIFY},
    {"MODULES",              TK_MODULES},
    {"NCHAR",                TK_NCHAR},
    {"NEXT",                 TK_NEXT},
    {"NMATCH",               TK_NMATCH},
    {"NONE",                 TK_NONE},
    {"NOT",                  TK_NOT},
    {"NOW",                  TK_NOW},
    {"NULL",                 TK_NULL},
D
dapan1121 已提交
155
    {"NULL_F",               TK_NULL_F},
X
Xiaoyu Wang 已提交
156 157 158 159 160 161 162 163
    {"NULLS",                TK_NULLS},
    {"OFFSET",               TK_OFFSET},
    {"ON",                   TK_ON},
    {"OR",                   TK_OR},
    {"ORDER",                TK_ORDER},
    {"OUTPUTTYPE",           TK_OUTPUTTYPE},
    {"PAGES",                TK_PAGES},
    {"PAGESIZE",             TK_PAGESIZE},
X
Xiaoyu Wang 已提交
164 165
    {"PARTITION",            TK_PARTITION},
    {"PASS",                 TK_PASS},
X
Xiaoyu Wang 已提交
166 167 168 169
    {"PORT",                 TK_PORT},
    {"PPS",                  TK_PPS},
    {"PRECISION",            TK_PRECISION},
    {"PREV",                 TK_PREV},
170
    {"PRIVILEGES",           TK_PRIVILEGES},
X
Xiaoyu Wang 已提交
171 172 173 174 175 176 177 178 179 180
    {"QNODE",                TK_QNODE},
    {"QNODES",               TK_QNODES},
    {"QTIME",                TK_QTIME},
    {"QUERIES",              TK_QUERIES},
    {"QUERY",                TK_QUERY},
    {"RANGE",                TK_RANGE},
    {"RATIO",                TK_RATIO},
    {"READ",                 TK_READ},
    {"REDISTRIBUTE",         TK_REDISTRIBUTE},
    {"RENAME",               TK_RENAME},
181
    {"REPLACE",              TK_REPLACE},
X
Xiaoyu Wang 已提交
182 183
    {"REPLICA",              TK_REPLICA},
    {"RESET",                TK_RESET},
D
dapan1121 已提交
184
    {"RESTORE",              TK_RESTORE},
X
Xiaoyu Wang 已提交
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
    {"RETENTIONS",           TK_RETENTIONS},
    {"REVOKE",               TK_REVOKE},
    {"ROLLUP",               TK_ROLLUP},
    {"SCHEMALESS",           TK_SCHEMALESS},
    {"SCORES",               TK_SCORES},
    {"SELECT",               TK_SELECT},
    {"SERVER_STATUS",        TK_SERVER_STATUS},
    {"SERVER_VERSION",       TK_SERVER_VERSION},
    {"SESSION",              TK_SESSION},
    {"SET",                  TK_SET},
    {"SHOW",                 TK_SHOW},
    {"SINGLE_STABLE",        TK_SINGLE_STABLE},
    {"SLIDING",              TK_SLIDING},
    {"SLIMIT",               TK_SLIMIT},
    {"SMA",                  TK_SMA},
    {"SMALLINT",             TK_SMALLINT},
    {"SNODE",                TK_SNODE},
    {"SNODES",               TK_SNODES},
    {"SOFFSET",              TK_SOFFSET},
    {"SPLIT",                TK_SPLIT},
    {"STABLE",               TK_STABLE},
    {"STABLES",              TK_STABLES},
X
Xiaoyu Wang 已提交
207
    {"START",                TK_START},
X
Xiaoyu Wang 已提交
208 209 210 211 212 213
    {"STATE",                TK_STATE},
    {"STATE_WINDOW",         TK_STATE_WINDOW},
    {"STORAGE",              TK_STORAGE},
    {"STREAM",               TK_STREAM},
    {"STREAMS",              TK_STREAMS},
    {"STRICT",               TK_STRICT},
X
Xiaoyu Wang 已提交
214
    {"STT_TRIGGER",          TK_STT_TRIGGER},
215
    {"SUBSCRIBE",            TK_SUBSCRIBE},
X
Xiaoyu Wang 已提交
216
    {"SUBSCRIPTIONS",        TK_SUBSCRIPTIONS},
217
    {"SUBTABLE",             TK_SUBTABLE},
X
Xiaoyu Wang 已提交
218 219 220
    {"SYSINFO",              TK_SYSINFO},
    {"TABLE",                TK_TABLE},
    {"TABLES",               TK_TABLES},
221 222
    {"TABLE_PREFIX",         TK_TABLE_PREFIX},
    {"TABLE_SUFFIX",         TK_TABLE_SUFFIX},
X
Xiaoyu Wang 已提交
223 224 225
    {"TAG",                  TK_TAG},
    {"TAGS",                 TK_TAGS},
    {"TBNAME",               TK_TBNAME},
X
Xiaoyu Wang 已提交
226
    {"THEN",                 TK_THEN},
X
Xiaoyu Wang 已提交
227 228 229 230 231 232 233 234 235 236 237
    {"TIMESTAMP",            TK_TIMESTAMP},
    {"TIMEZONE",             TK_TIMEZONE},
    {"TINYINT",              TK_TINYINT},
    {"TO",                   TK_TO},
    {"TODAY",                TK_TODAY},
    {"TOPIC",                TK_TOPIC},
    {"TOPICS",               TK_TOPICS},
    {"TRANSACTION",          TK_TRANSACTION},
    {"TRANSACTIONS",         TK_TRANSACTIONS},
    {"TRIGGER",              TK_TRIGGER},
    {"TRIM",                 TK_TRIM},
238
    {"TSDB_PAGESIZE",        TK_TSDB_PAGESIZE},
X
Xiaoyu Wang 已提交
239 240 241 242
    {"TSERIES",              TK_TSERIES},
    {"TTL",                  TK_TTL},
    {"UNION",                TK_UNION},
    {"UNSIGNED",             TK_UNSIGNED},
243
    {"UPDATE",               TK_UPDATE},
X
Xiaoyu Wang 已提交
244 245 246 247 248
    {"USE",                  TK_USE},
    {"USER",                 TK_USER},
    {"USERS",                TK_USERS},
    {"USING",                TK_USING},
    {"VALUE",                TK_VALUE},
D
dapan1121 已提交
249
    {"VALUE_F",              TK_VALUE_F},
X
Xiaoyu Wang 已提交
250 251 252 253 254 255
    {"VALUES",               TK_VALUES},
    {"VARCHAR",              TK_VARCHAR},
    {"VARIABLES",            TK_VARIABLES},
    {"VERBOSE",              TK_VERBOSE},
    {"VGROUP",               TK_VGROUP},
    {"VGROUPS",              TK_VGROUPS},
D
dapan1121 已提交
256
    {"VNODE",                TK_VNODE},
257
    {"VNODES",               TK_VNODES},
X
Xiaoyu Wang 已提交
258 259
    {"WAL_FSYNC_PERIOD",     TK_WAL_FSYNC_PERIOD},
    {"WAL_LEVEL",            TK_WAL_LEVEL},
X
Xiaoyu Wang 已提交
260 261 262 263
    {"WAL_RETENTION_PERIOD", TK_WAL_RETENTION_PERIOD},
    {"WAL_RETENTION_SIZE",   TK_WAL_RETENTION_SIZE},
    {"WAL_ROLL_PERIOD",      TK_WAL_ROLL_PERIOD},
    {"WAL_SEGMENT_SIZE",     TK_WAL_SEGMENT_SIZE},
X
Xiaoyu Wang 已提交
264
    {"WATERMARK",            TK_WATERMARK},
X
Xiaoyu Wang 已提交
265
    {"WHEN",                 TK_WHEN},
X
Xiaoyu Wang 已提交
266 267 268 269 270
    {"WHERE",                TK_WHERE},
    {"WINDOW_CLOSE",         TK_WINDOW_CLOSE},
    {"WITH",                 TK_WITH},
    {"WRITE",                TK_WRITE},
    {"_C0",                  TK_ROWTS},
271
    {"_IROWTS",              TK_IROWTS},
272
    {"_ISFILLED",            TK_ISFILLED},
X
Xiaoyu Wang 已提交
273 274 275 276
    {"_QDURATION",           TK_QDURATION},
    {"_QEND",                TK_QEND},
    {"_QSTART",              TK_QSTART},
    {"_ROWTS",               TK_ROWTS},
277
    {"_TAGS",                TK_QTAGS},
X
Xiaoyu Wang 已提交
278 279 280
    {"_WDURATION",           TK_WDURATION},
    {"_WEND",                TK_WEND},
    {"_WSTART",              TK_WSTART},
281
    {"ALIVE",                TK_ALIVE},
H
hzcheng 已提交
282
};
X
Xiaoyu Wang 已提交
283
// clang-format on
H
hzcheng 已提交
284 285 286 287 288 289 290 291 292 293 294 295 296

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
297
static void* keywordHashTable = NULL;
H
hzcheng 已提交
298

S
TD-1057  
Shengliang Guan 已提交
299
static void doInitKeywordsTable(void) {
300
  int numOfEntries = tListLen(keywordTable);
X
Xiaoyu Wang 已提交
301

H
Haojun Liao 已提交
302
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
303
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
304
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
305
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
306
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
307
  }
308 309
}

wafwerar's avatar
wafwerar 已提交
310
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
311

312
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
313
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
X
Xiaoyu Wang 已提交
314

H
hjxilinx 已提交
315
  char key[512] = {0};
X
Xiaoyu Wang 已提交
316
  if (n > tListLen(key)) {  // too long token, can not be any other token type
317
    return TK_NK_ID;
318
  }
X
Xiaoyu Wang 已提交
319

H
hzcheng 已提交
320 321
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
322
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
323 324 325 326 327
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
328
  if (keywordHashTable == NULL) {
329
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
330
  }
H
Haojun Liao 已提交
331

H
Haojun Liao 已提交
332
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
X
Xiaoyu Wang 已提交
333
  return (pKey != NULL) ? (*pKey)->type : TK_NK_ID;
H
hzcheng 已提交
334 335
}

H
huili 已提交
336
/*
337 338 339
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
340
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
341
  uint32_t i;
H
hzcheng 已提交
342 343 344 345 346 347 348 349
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
350
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
351 352 353
      return i;
    }
    case ':': {
354
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
355 356 357 358 359 360
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
361
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
362
        return i;
363 364 365
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
366
      }
X
Xiaoyu Wang 已提交
367
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
368 369 370
      return 1;
    }
    case '(': {
371
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
372 373 374
      return 1;
    }
    case ')': {
375
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
376 377 378
      return 1;
    }
    case ';': {
379
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
380 381 382
      return 1;
    }
    case '+': {
383
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
384 385 386
      return 1;
    }
    case '*': {
387
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
388 389 390 391
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
392
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
393 394 395 396 397
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
398
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
399 400 401
      return i;
    }
    case '%': {
402
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
403 404 405
      return 1;
    }
    case '=': {
406
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
407 408 409 410
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
411
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
412 413
        return 2;
      } else if (z[1] == '>') {
414
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
415 416
        return 2;
      } else if (z[1] == '<') {
417
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
418 419
        return 2;
      } else {
420
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
421 422 423 424 425
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
426
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
427 428
        return 2;
      } else if (z[1] == '>') {
429
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
430 431
        return 2;
      } else {
432
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
433 434 435 436 437
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
438
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
439 440
        return 2;
      } else {
441
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
442 443 444 445 446
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
447
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
448 449
        return 1;
      } else {
450
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
451 452 453 454
        return 2;
      }
    }
    case ',': {
455
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
456 457 458
      return 1;
    }
    case '&': {
459
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
460 461 462
      return 1;
    }
    case '~': {
463
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
464 465
      return 1;
    }
S
slguan 已提交
466
    case '?': {
467
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
468 469
      return 1;
    }
470
    case '`':
H
hzcheng 已提交
471 472
    case '\'':
    case '"': {
S
slguan 已提交
473 474
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
475
      for (i = 1; z[i]; i++) {
476
        if (delim != '`' && z[i] == '\\') {  // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
477 478 479
          i++;
          continue;
        }
X
Xiaoyu Wang 已提交
480

481
        if (z[i] == delim) {
H
hzcheng 已提交
482 483 484
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
485
            strEnd = true;
H
hzcheng 已提交
486 487 488 489
            break;
          }
        }
      }
X
Xiaoyu Wang 已提交
490

H
hzcheng 已提交
491
      if (z[i]) i++;
H
huili 已提交
492

S
slguan 已提交
493
      if (strEnd) {
X
Xiaoyu Wang 已提交
494
        *tokenId = (delim == '`') ? TK_NK_ID : TK_NK_STRING;
S
slguan 已提交
495 496
        return i;
      }
H
huili 已提交
497

S
slguan 已提交
498
      break;
H
hzcheng 已提交
499 500
    }
    case '.': {
S
slguan 已提交
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

518
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
519 520
        return i;
      } else {
521
        *tokenId = TK_NK_DOT;
S
slguan 已提交
522 523 524 525 526 527 528
        return 1;
      }
    }

    case '0': {
      char next = z[1];

X
Xiaoyu Wang 已提交
529
      if (next == 'b') {  // bin number
530
        *tokenId = TK_NK_BIN;
S
slguan 已提交
531 532 533 534 535 536 537 538
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
X
Xiaoyu Wang 已提交
539
      } else if (next == 'x') {  // hex number
540
        *tokenId = TK_NK_HEX;
S
slguan 已提交
541 542 543 544 545 546 547 548 549
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
550 551 552 553 554 555 556 557 558 559
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
560
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
561 562 563
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
564
      /* here is the 1u/1a/2s/3m/9y */
X
Xiaoyu Wang 已提交
565 566 567
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' ||
           z[i] == 'n' || z[i] == 'y' || z[i] == 'w' || z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' ||
           z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' || z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
568
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
569
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
570 571 572 573 574 575 576 577 578 579
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
580
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
581 582 583 584
        seg++;
      }

      if (seg == 4) {  // ip address
585
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
586
        return i;
X
Xiaoyu Wang 已提交
587 588
      } else if (seg > 2) {
        break;
H
hzcheng 已提交
589 590 591 592 593 594 595 596
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
597
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
598 599 600
      }
      return i;
    }
X
Xiaoyu Wang 已提交
601 602 603 604 605 606
    // case '[': {
    //   for (i = 1; z[i] && z[i - 1] != ']'; i++) {
    //   }
    //   *tokenId = TK_NK_ID;
    //   return i;
    // }
H
hzcheng 已提交
607 608 609 610
    case 'T':
    case 't':
    case 'F':
    case 'f': {
X
Xiaoyu Wang 已提交
611
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
612 613 614
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
615
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
616 617
        return i;
      }
X
Xiaoyu Wang 已提交
618 619
      *tokenId = tKeywordCode(z, i);
      return i;
H
hzcheng 已提交
620 621
    }
    default: {
X
Xiaoyu Wang 已提交
622
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
H
hzcheng 已提交
623 624
        break;
      }
X
Xiaoyu Wang 已提交
625
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
626
      }
627
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
628 629 630 631
      return i;
    }
  }

632
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
633 634 635
  return 0;
}

X
Xiaoyu Wang 已提交
636
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr, bool* pIgnoreComma) {
H
Haojun Liao 已提交
637
  SToken t0 = {0};
S
slguan 已提交
638

H
hzcheng 已提交
639 640
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
641 642
    t0.n = 0;
    return t0;
H
hzcheng 已提交
643 644
  }

645
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
646 647 648
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
649
    int32_t numOfComma = 0;
X
Xiaoyu Wang 已提交
650
    char    t = str[*i];
H
Haojun Liao 已提交
651 652 653 654
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
655
      }
X
Xiaoyu Wang 已提交
656

X
Xiaoyu Wang 已提交
657 658 659 660
      if (NULL != pIgnoreComma && t == ',') {
        *pIgnoreComma = true;
      }

H
Haojun Liao 已提交
661
      t = str[++(*i)];
S
slguan 已提交
662
    }
H
hzcheng 已提交
663

664
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
665
    break;
S
slguan 已提交
666

H
Haojun Liao 已提交
667 668
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
669
    bool ignore = false;
S
slguan 已提交
670 671
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
672
        ignore = true;
S
slguan 已提交
673 674 675 676
        break;
      }
    }

H
Haojun Liao 已提交
677
    if (!ignore) {
S
slguan 已提交
678 679
      break;
    }
H
Haojun Liao 已提交
680
#endif
H
hzcheng 已提交
681 682
  }

683
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
684
    t0.n = 0;
D
dapan1121 已提交
685
    t0.type = 0;
S
slguan 已提交
686 687 688 689 690 691 692 693
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
694
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
695 696

    // only id and string are valid
697
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
698
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
699 700 701 702 703 704 705 706 707
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
708
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
709
      len = tGetToken(&str[*i + t0.n], &type);
710
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
711 712 713
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
714 715 716
    }
  }

X
Xiaoyu Wang 已提交
717
  t0.z = (char*)str + (*i);
S
slguan 已提交
718 719 720
  *i += t0.n;

  return t0;
H
hzcheng 已提交
721 722
}

X
Xiaoyu Wang 已提交
723
bool taosIsKeyWordToken(const char* z, int32_t len) { return (tKeywordCode((char*)z, len) != TK_NK_ID); }
H
Haojun Liao 已提交
724 725

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
726 727 728 729
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
730
}