parTokenizer.c 21.1 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
hjxilinx 已提交
18
#include "taosdef.h"
X
Xiaoyu Wang 已提交
19
#include "thash.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

X
Xiaoyu Wang 已提交
29
// clang-format off
S
slguan 已提交
30
// keywords in sql string
H
hzcheng 已提交
31
static SKeyword keywordTable[] = {
X
Xiaoyu Wang 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
    {"ACCOUNT",              TK_ACCOUNT},
    {"ACCOUNTS",             TK_ACCOUNTS},
    {"ADD",                  TK_ADD},
    {"AGGREGATE",            TK_AGGREGATE},
    {"ALL",                  TK_ALL},
    {"ALTER",                TK_ALTER},
    {"ANALYZE",              TK_ANALYZE},
    {"AND",                  TK_AND},
    {"APPS",                 TK_APPS},
    {"AS",                   TK_AS},
    {"ASC",                  TK_ASC},
    {"AT_ONCE",              TK_AT_ONCE},
    {"BALANCE",              TK_BALANCE},
    {"BETWEEN",              TK_BETWEEN},
    {"BIGINT",               TK_BIGINT},
X
Xiaoyu Wang 已提交
47
    {"BINARY",               TK_BINARY},
X
Xiaoyu Wang 已提交
48 49 50 51 52 53 54 55 56
    {"BNODE",                TK_BNODE},
    {"BNODES",               TK_BNODES},
    {"BOOL",                 TK_BOOL},
    {"BUFFER",               TK_BUFFER},
    {"BUFSIZE",              TK_BUFSIZE},
    {"BY",                   TK_BY},
    {"CACHE",                TK_CACHE},
    {"CACHEMODEL",           TK_CACHEMODEL},
    {"CACHESIZE",            TK_CACHESIZE},
X
Xiaoyu Wang 已提交
57
    {"CASE",                 TK_CASE},
X
Xiaoyu Wang 已提交
58 59 60 61 62 63
    {"CAST",                 TK_CAST},
    {"CLIENT_VERSION",       TK_CLIENT_VERSION},
    {"CLUSTER",              TK_CLUSTER},
    {"COLUMN",               TK_COLUMN},
    {"COMMENT",              TK_COMMENT},
    {"COMP",                 TK_COMP},
X
Xiaoyu Wang 已提交
64
    {"COMPACT",              TK_COMPACT},
X
Xiaoyu Wang 已提交
65 66
    {"CONNECTION",           TK_CONNECTION},
    {"CONNECTIONS",          TK_CONNECTIONS},
X
Xiaoyu Wang 已提交
67
    {"CONNS",                TK_CONNS},
X
Xiaoyu Wang 已提交
68 69 70 71 72 73 74 75 76 77
    {"CONSUMER",             TK_CONSUMER},
    {"CONSUMERS",            TK_CONSUMERS},
    {"CONTAINS",             TK_CONTAINS},
    {"COUNT",                TK_COUNT},
    {"CREATE",               TK_CREATE},
    {"CURRENT_USER",         TK_CURRENT_USER},
    {"DATABASE",             TK_DATABASE},
    {"DATABASES",            TK_DATABASES},
    {"DBS",                  TK_DBS},
    {"DELETE",               TK_DELETE},
78
    {"DELETE_MARK",          TK_DELETE_MARK},
X
Xiaoyu Wang 已提交
79 80 81 82 83 84 85 86 87
    {"DESC",                 TK_DESC},
    {"DESCRIBE",             TK_DESCRIBE},
    {"DISTINCT",             TK_DISTINCT},
    {"DISTRIBUTED",          TK_DISTRIBUTED},
    {"DNODE",                TK_DNODE},
    {"DNODES",               TK_DNODES},
    {"DOUBLE",               TK_DOUBLE},
    {"DROP",                 TK_DROP},
    {"DURATION",             TK_DURATION},
X
Xiaoyu Wang 已提交
88
    {"ELSE",                 TK_ELSE},
X
Xiaoyu Wang 已提交
89
    {"ENABLE",               TK_ENABLE},
X
Xiaoyu Wang 已提交
90
    {"END",                  TK_END},
X
Xiaoyu Wang 已提交
91 92 93
    {"EXISTS",               TK_EXISTS},
    {"EXPIRED",              TK_EXPIRED},
    {"EXPLAIN",              TK_EXPLAIN},
X
Xiaoyu Wang 已提交
94
    {"EVENT_WINDOW",         TK_EVENT_WINDOW},
X
Xiaoyu Wang 已提交
95 96 97
    {"EVERY",                TK_EVERY},
    {"FILE",                 TK_FILE},
    {"FILL",                 TK_FILL},
98
    {"FILL_HISTORY",         TK_FILL_HISTORY},
X
Xiaoyu Wang 已提交
99 100 101 102
    {"FIRST",                TK_FIRST},
    {"FLOAT",                TK_FLOAT},
    {"FLUSH",                TK_FLUSH},
    {"FROM",                 TK_FROM},
103
    {"FORCE",                TK_FORCE},
X
Xiaoyu Wang 已提交
104 105 106 107 108 109 110 111 112 113 114 115 116 117
    {"FUNCTION",             TK_FUNCTION},
    {"FUNCTIONS",            TK_FUNCTIONS},
    {"GRANT",                TK_GRANT},
    {"GRANTS",               TK_GRANTS},
    {"GROUP",                TK_GROUP},
    {"HAVING",               TK_HAVING},
    {"IF",                   TK_IF},
    {"IGNORE",               TK_IGNORE},
    {"IMPORT",               TK_IMPORT},
    {"IN",                   TK_IN},
    {"INDEX",                TK_INDEX},
    {"INDEXES",              TK_INDEXES},
    {"INNER",                TK_INNER},
    {"INSERT",               TK_INSERT},
X
Xiaoyu Wang 已提交
118
    {"INT",                  TK_INT},
X
Xiaoyu Wang 已提交
119 120 121 122 123 124 125 126 127 128
    {"INTEGER",              TK_INTEGER},
    {"INTERVAL",             TK_INTERVAL},
    {"INTO",                 TK_INTO},
    {"IS",                   TK_IS},
    {"JOIN",                 TK_JOIN},
    {"JSON",                 TK_JSON},
    {"KEEP",                 TK_KEEP},
    {"KILL",                 TK_KILL},
    {"LAST",                 TK_LAST},
    {"LAST_ROW",             TK_LAST_ROW},
X
Xiaoyu Wang 已提交
129
    {"LICENCES",             TK_LICENCES},
X
Xiaoyu Wang 已提交
130 131 132 133 134 135 136
    {"LIKE",                 TK_LIKE},
    {"LIMIT",                TK_LIMIT},
    {"LINEAR",               TK_LINEAR},
    {"LOCAL",                TK_LOCAL},
    {"MATCH",                TK_MATCH},
    {"MAXROWS",              TK_MAXROWS},
    {"MAX_DELAY",            TK_MAX_DELAY},
137
    {"MAX_SPEED",            TK_MAX_SPEED},
X
Xiaoyu Wang 已提交
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
    {"MERGE",                TK_MERGE},
    {"META",                 TK_META},
    {"MINROWS",              TK_MINROWS},
    {"MINUS",                TK_MINUS},
    {"MNODE",                TK_MNODE},
    {"MNODES",               TK_MNODES},
    {"MODIFY",               TK_MODIFY},
    {"MODULES",              TK_MODULES},
    {"NCHAR",                TK_NCHAR},
    {"NEXT",                 TK_NEXT},
    {"NMATCH",               TK_NMATCH},
    {"NONE",                 TK_NONE},
    {"NOT",                  TK_NOT},
    {"NOW",                  TK_NOW},
    {"NULL",                 TK_NULL},
D
dapan1121 已提交
153
    {"NULL_F",               TK_NULL_F},
X
Xiaoyu Wang 已提交
154 155 156 157 158 159 160 161
    {"NULLS",                TK_NULLS},
    {"OFFSET",               TK_OFFSET},
    {"ON",                   TK_ON},
    {"OR",                   TK_OR},
    {"ORDER",                TK_ORDER},
    {"OUTPUTTYPE",           TK_OUTPUTTYPE},
    {"PAGES",                TK_PAGES},
    {"PAGESIZE",             TK_PAGESIZE},
X
Xiaoyu Wang 已提交
162 163
    {"PARTITION",            TK_PARTITION},
    {"PASS",                 TK_PASS},
X
Xiaoyu Wang 已提交
164 165 166 167
    {"PORT",                 TK_PORT},
    {"PPS",                  TK_PPS},
    {"PRECISION",            TK_PRECISION},
    {"PREV",                 TK_PREV},
168
    {"PRIVILEGES",           TK_PRIVILEGES},
X
Xiaoyu Wang 已提交
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
    {"QNODE",                TK_QNODE},
    {"QNODES",               TK_QNODES},
    {"QTIME",                TK_QTIME},
    {"QUERIES",              TK_QUERIES},
    {"QUERY",                TK_QUERY},
    {"RANGE",                TK_RANGE},
    {"RATIO",                TK_RATIO},
    {"READ",                 TK_READ},
    {"REDISTRIBUTE",         TK_REDISTRIBUTE},
    {"RENAME",               TK_RENAME},
    {"REPLICA",              TK_REPLICA},
    {"RESET",                TK_RESET},
    {"RETENTIONS",           TK_RETENTIONS},
    {"REVOKE",               TK_REVOKE},
    {"ROLLUP",               TK_ROLLUP},
    {"SCHEMALESS",           TK_SCHEMALESS},
    {"SCORES",               TK_SCORES},
    {"SELECT",               TK_SELECT},
    {"SERVER_STATUS",        TK_SERVER_STATUS},
    {"SERVER_VERSION",       TK_SERVER_VERSION},
    {"SESSION",              TK_SESSION},
    {"SET",                  TK_SET},
    {"SHOW",                 TK_SHOW},
    {"SINGLE_STABLE",        TK_SINGLE_STABLE},
    {"SLIDING",              TK_SLIDING},
    {"SLIMIT",               TK_SLIMIT},
    {"SMA",                  TK_SMA},
    {"SMALLINT",             TK_SMALLINT},
    {"SNODE",                TK_SNODE},
    {"SNODES",               TK_SNODES},
    {"SOFFSET",              TK_SOFFSET},
    {"SPLIT",                TK_SPLIT},
    {"STABLE",               TK_STABLE},
    {"STABLES",              TK_STABLES},
X
Xiaoyu Wang 已提交
203
    {"START",                TK_START},
X
Xiaoyu Wang 已提交
204 205 206 207 208 209
    {"STATE",                TK_STATE},
    {"STATE_WINDOW",         TK_STATE_WINDOW},
    {"STORAGE",              TK_STORAGE},
    {"STREAM",               TK_STREAM},
    {"STREAMS",              TK_STREAMS},
    {"STRICT",               TK_STRICT},
X
Xiaoyu Wang 已提交
210
    {"STT_TRIGGER",          TK_STT_TRIGGER},
211
    {"SUBSCRIBE",            TK_SUBSCRIBE},
X
Xiaoyu Wang 已提交
212
    {"SUBSCRIPTIONS",        TK_SUBSCRIPTIONS},
213
    {"SUBTABLE",             TK_SUBTABLE},
X
Xiaoyu Wang 已提交
214 215 216
    {"SYSINFO",              TK_SYSINFO},
    {"TABLE",                TK_TABLE},
    {"TABLES",               TK_TABLES},
217 218
    {"TABLE_PREFIX",         TK_TABLE_PREFIX},
    {"TABLE_SUFFIX",         TK_TABLE_SUFFIX},
X
Xiaoyu Wang 已提交
219 220 221
    {"TAG",                  TK_TAG},
    {"TAGS",                 TK_TAGS},
    {"TBNAME",               TK_TBNAME},
X
Xiaoyu Wang 已提交
222
    {"THEN",                 TK_THEN},
X
Xiaoyu Wang 已提交
223 224 225 226 227 228 229 230 231 232 233
    {"TIMESTAMP",            TK_TIMESTAMP},
    {"TIMEZONE",             TK_TIMEZONE},
    {"TINYINT",              TK_TINYINT},
    {"TO",                   TK_TO},
    {"TODAY",                TK_TODAY},
    {"TOPIC",                TK_TOPIC},
    {"TOPICS",               TK_TOPICS},
    {"TRANSACTION",          TK_TRANSACTION},
    {"TRANSACTIONS",         TK_TRANSACTIONS},
    {"TRIGGER",              TK_TRIGGER},
    {"TRIM",                 TK_TRIM},
234
    {"TSDB_PAGESIZE",        TK_TSDB_PAGESIZE},
X
Xiaoyu Wang 已提交
235 236 237 238
    {"TSERIES",              TK_TSERIES},
    {"TTL",                  TK_TTL},
    {"UNION",                TK_UNION},
    {"UNSIGNED",             TK_UNSIGNED},
239
    {"UPDATE",                  TK_UPDATE},
X
Xiaoyu Wang 已提交
240 241 242 243 244
    {"USE",                  TK_USE},
    {"USER",                 TK_USER},
    {"USERS",                TK_USERS},
    {"USING",                TK_USING},
    {"VALUE",                TK_VALUE},
D
dapan1121 已提交
245
    {"VALUE_F",              TK_VALUE_F},
X
Xiaoyu Wang 已提交
246 247 248 249 250 251
    {"VALUES",               TK_VALUES},
    {"VARCHAR",              TK_VARCHAR},
    {"VARIABLES",            TK_VARIABLES},
    {"VERBOSE",              TK_VERBOSE},
    {"VGROUP",               TK_VGROUP},
    {"VGROUPS",              TK_VGROUPS},
252
    {"VNODES",               TK_VNODES},
X
Xiaoyu Wang 已提交
253 254
    {"WAL_FSYNC_PERIOD",     TK_WAL_FSYNC_PERIOD},
    {"WAL_LEVEL",            TK_WAL_LEVEL},
X
Xiaoyu Wang 已提交
255 256 257 258
    {"WAL_RETENTION_PERIOD", TK_WAL_RETENTION_PERIOD},
    {"WAL_RETENTION_SIZE",   TK_WAL_RETENTION_SIZE},
    {"WAL_ROLL_PERIOD",      TK_WAL_ROLL_PERIOD},
    {"WAL_SEGMENT_SIZE",     TK_WAL_SEGMENT_SIZE},
X
Xiaoyu Wang 已提交
259
    {"WATERMARK",            TK_WATERMARK},
X
Xiaoyu Wang 已提交
260
    {"WHEN",                 TK_WHEN},
X
Xiaoyu Wang 已提交
261 262 263 264 265
    {"WHERE",                TK_WHERE},
    {"WINDOW_CLOSE",         TK_WINDOW_CLOSE},
    {"WITH",                 TK_WITH},
    {"WRITE",                TK_WRITE},
    {"_C0",                  TK_ROWTS},
266
    {"_IROWTS",              TK_IROWTS},
267
    {"_ISFILLED",            TK_ISFILLED},
X
Xiaoyu Wang 已提交
268 269 270 271
    {"_QDURATION",           TK_QDURATION},
    {"_QEND",                TK_QEND},
    {"_QSTART",              TK_QSTART},
    {"_ROWTS",               TK_ROWTS},
272
    {"_TAGS",                TK_QTAGS},
X
Xiaoyu Wang 已提交
273 274 275
    {"_WDURATION",           TK_WDURATION},
    {"_WEND",                TK_WEND},
    {"_WSTART",              TK_WSTART},
276
    {"ALIVE",                TK_ALIVE},
H
hzcheng 已提交
277
};
X
Xiaoyu Wang 已提交
278
// clang-format on
H
hzcheng 已提交
279 280 281 282 283 284 285 286 287 288 289 290 291

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
292
static void* keywordHashTable = NULL;
H
hzcheng 已提交
293

S
TD-1057  
Shengliang Guan 已提交
294
static void doInitKeywordsTable(void) {
295
  int numOfEntries = tListLen(keywordTable);
X
Xiaoyu Wang 已提交
296

H
Haojun Liao 已提交
297
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
298
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
299
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
300
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
301
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
302
  }
303 304
}

wafwerar's avatar
wafwerar 已提交
305
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
306

307
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
308
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
X
Xiaoyu Wang 已提交
309

H
hjxilinx 已提交
310
  char key[512] = {0};
X
Xiaoyu Wang 已提交
311
  if (n > tListLen(key)) {  // too long token, can not be any other token type
312
    return TK_NK_ID;
313
  }
X
Xiaoyu Wang 已提交
314

H
hzcheng 已提交
315 316
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
317
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
318 319 320 321 322
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
323
  if (keywordHashTable == NULL) {
324
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
325
  }
H
Haojun Liao 已提交
326

H
Haojun Liao 已提交
327
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
X
Xiaoyu Wang 已提交
328
  return (pKey != NULL) ? (*pKey)->type : TK_NK_ID;
H
hzcheng 已提交
329 330
}

H
huili 已提交
331
/*
332 333 334
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
335
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
336
  uint32_t i;
H
hzcheng 已提交
337 338 339 340 341 342 343 344
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
345
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
346 347 348
      return i;
    }
    case ':': {
349
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
350 351 352 353 354 355
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
356
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
357
        return i;
358 359 360
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
361
      }
X
Xiaoyu Wang 已提交
362
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
363 364 365
      return 1;
    }
    case '(': {
366
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
367 368 369
      return 1;
    }
    case ')': {
370
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
371 372 373
      return 1;
    }
    case ';': {
374
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
375 376 377
      return 1;
    }
    case '+': {
378
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
379 380 381
      return 1;
    }
    case '*': {
382
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
383 384 385 386
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
387
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
388 389 390 391 392
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
393
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
394 395 396
      return i;
    }
    case '%': {
397
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
398 399 400
      return 1;
    }
    case '=': {
401
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
402 403 404 405
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
406
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
407 408
        return 2;
      } else if (z[1] == '>') {
409
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
410 411
        return 2;
      } else if (z[1] == '<') {
412
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
413 414
        return 2;
      } else {
415
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
416 417 418 419 420
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
421
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
422 423
        return 2;
      } else if (z[1] == '>') {
424
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
425 426
        return 2;
      } else {
427
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
428 429 430 431 432
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
433
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
434 435
        return 2;
      } else {
436
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
437 438 439 440 441
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
442
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
443 444
        return 1;
      } else {
445
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
446 447 448 449
        return 2;
      }
    }
    case ',': {
450
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
451 452 453
      return 1;
    }
    case '&': {
454
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
455 456 457
      return 1;
    }
    case '~': {
458
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
459 460
      return 1;
    }
S
slguan 已提交
461
    case '?': {
462
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
463 464
      return 1;
    }
465
    case '`':
H
hzcheng 已提交
466 467
    case '\'':
    case '"': {
S
slguan 已提交
468 469
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
470
      for (i = 1; z[i]; i++) {
471
        if (delim != '`' && z[i] == '\\') {  // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
472 473 474
          i++;
          continue;
        }
X
Xiaoyu Wang 已提交
475

476
        if (z[i] == delim) {
H
hzcheng 已提交
477 478 479
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
480
            strEnd = true;
H
hzcheng 已提交
481 482 483 484
            break;
          }
        }
      }
X
Xiaoyu Wang 已提交
485

H
hzcheng 已提交
486
      if (z[i]) i++;
H
huili 已提交
487

S
slguan 已提交
488
      if (strEnd) {
X
Xiaoyu Wang 已提交
489
        *tokenId = (delim == '`') ? TK_NK_ID : TK_NK_STRING;
S
slguan 已提交
490 491
        return i;
      }
H
huili 已提交
492

S
slguan 已提交
493
      break;
H
hzcheng 已提交
494 495
    }
    case '.': {
S
slguan 已提交
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

513
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
514 515
        return i;
      } else {
516
        *tokenId = TK_NK_DOT;
S
slguan 已提交
517 518 519 520 521 522 523
        return 1;
      }
    }

    case '0': {
      char next = z[1];

X
Xiaoyu Wang 已提交
524
      if (next == 'b') {  // bin number
525
        *tokenId = TK_NK_BIN;
S
slguan 已提交
526 527 528 529 530 531 532 533
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
X
Xiaoyu Wang 已提交
534
      } else if (next == 'x') {  // hex number
535
        *tokenId = TK_NK_HEX;
S
slguan 已提交
536 537 538 539 540 541 542 543 544
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
545 546 547 548 549 550 551 552 553 554
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
555
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
556 557 558
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
559
      /* here is the 1u/1a/2s/3m/9y */
X
Xiaoyu Wang 已提交
560 561 562
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' ||
           z[i] == 'n' || z[i] == 'y' || z[i] == 'w' || z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' ||
           z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' || z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
563
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
564
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
565 566 567 568 569 570 571 572 573 574
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
575
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
576 577 578 579
        seg++;
      }

      if (seg == 4) {  // ip address
580
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
581
        return i;
X
Xiaoyu Wang 已提交
582 583
      } else if (seg > 2) {
        break;
H
hzcheng 已提交
584 585 586 587 588 589 590 591
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
592
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
593 594 595
      }
      return i;
    }
X
Xiaoyu Wang 已提交
596 597 598 599 600 601
    // case '[': {
    //   for (i = 1; z[i] && z[i - 1] != ']'; i++) {
    //   }
    //   *tokenId = TK_NK_ID;
    //   return i;
    // }
H
hzcheng 已提交
602 603 604 605
    case 'T':
    case 't':
    case 'F':
    case 'f': {
X
Xiaoyu Wang 已提交
606
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
607 608 609
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
610
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
611 612
        return i;
      }
X
Xiaoyu Wang 已提交
613 614
      *tokenId = tKeywordCode(z, i);
      return i;
H
hzcheng 已提交
615 616
    }
    default: {
X
Xiaoyu Wang 已提交
617
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
H
hzcheng 已提交
618 619
        break;
      }
X
Xiaoyu Wang 已提交
620
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
621
      }
622
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
623 624 625 626
      return i;
    }
  }

627
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
628 629 630
  return 0;
}

X
Xiaoyu Wang 已提交
631
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr, bool* pIgnoreComma) {
H
Haojun Liao 已提交
632
  SToken t0 = {0};
S
slguan 已提交
633

H
hzcheng 已提交
634 635
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
636 637
    t0.n = 0;
    return t0;
H
hzcheng 已提交
638 639
  }

640
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
641 642 643
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
644
    int32_t numOfComma = 0;
X
Xiaoyu Wang 已提交
645
    char    t = str[*i];
H
Haojun Liao 已提交
646 647 648 649
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
650
      }
X
Xiaoyu Wang 已提交
651

X
Xiaoyu Wang 已提交
652 653 654 655
      if (NULL != pIgnoreComma && t == ',') {
        *pIgnoreComma = true;
      }

H
Haojun Liao 已提交
656
      t = str[++(*i)];
S
slguan 已提交
657
    }
H
hzcheng 已提交
658

659
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
660
    break;
S
slguan 已提交
661

H
Haojun Liao 已提交
662 663
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
664
    bool ignore = false;
S
slguan 已提交
665 666
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
667
        ignore = true;
S
slguan 已提交
668 669 670 671
        break;
      }
    }

H
Haojun Liao 已提交
672
    if (!ignore) {
S
slguan 已提交
673 674
      break;
    }
H
Haojun Liao 已提交
675
#endif
H
hzcheng 已提交
676 677
  }

678
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
679
    t0.n = 0;
D
dapan1121 已提交
680
    t0.type = 0;
S
slguan 已提交
681 682 683 684 685 686 687 688
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
689
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
690 691

    // only id and string are valid
692
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
693
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
694 695 696 697 698 699 700 701 702
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
703
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
704
      len = tGetToken(&str[*i + t0.n], &type);
705
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
706 707 708
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
709 710 711
    }
  }

X
Xiaoyu Wang 已提交
712
  t0.z = (char*)str + (*i);
S
slguan 已提交
713 714 715
  *i += t0.n;

  return t0;
H
hzcheng 已提交
716 717
}

X
Xiaoyu Wang 已提交
718
bool taosIsKeyWordToken(const char* z, int32_t len) { return (tKeywordCode((char*)z, len) != TK_NK_ID); }
H
Haojun Liao 已提交
719 720

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
721 722 723 724
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
725
}