parTokenizer.c 20.6 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
hjxilinx 已提交
18
#include "taosdef.h"
X
Xiaoyu Wang 已提交
19
#include "thash.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

X
Xiaoyu Wang 已提交
29
// clang-format off
S
slguan 已提交
30
// keywords in sql string
H
hzcheng 已提交
31
static SKeyword keywordTable[] = {
X
Xiaoyu Wang 已提交
32 33 34 35 36 37 38 39 40 41 42 43
    {"ACCOUNT",       TK_ACCOUNT},
    {"ACCOUNTS",      TK_ACCOUNTS},
    {"ADD",           TK_ADD},
    {"AGGREGATE",     TK_AGGREGATE},
    {"ALL",           TK_ALL},
    {"ALTER",         TK_ALTER},
    {"ANALYZE",       TK_ANALYZE},
    {"AND",           TK_AND},
    {"APPS",          TK_APPS},
    {"AS",            TK_AS},
    {"ASC",           TK_ASC},
    {"AT_ONCE",       TK_AT_ONCE},
X
Xiaoyu Wang 已提交
44
    {"BALANCE",       TK_BALANCE},
X
Xiaoyu Wang 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
    {"BETWEEN",       TK_BETWEEN},
    {"BINARY",        TK_BINARY},
    {"BIGINT",        TK_BIGINT},
    {"BNODE",         TK_BNODE},
    {"BNODES",        TK_BNODES},
    {"BOOL",          TK_BOOL},
    {"BUFFER",        TK_BUFFER},
    {"BUFSIZE",       TK_BUFSIZE},
    {"BY",            TK_BY},
    {"CACHE",         TK_CACHE},
    {"CACHELAST",     TK_CACHELAST},
    {"CAST",          TK_CAST},
    {"CLUSTER",       TK_CLUSTER},
    {"COLUMN",        TK_COLUMN},
    {"COMMENT",       TK_COMMENT},
    {"COMP",          TK_COMP},
    {"COMPACT",       TK_COMPACT},
    {"CONNS",         TK_CONNS},
    {"CONNECTION",    TK_CONNECTION},
    {"CONNECTIONS",   TK_CONNECTIONS},
65
    {"CONSUMER",      TK_CONSUMER},
X
Xiaoyu Wang 已提交
66 67
    {"COUNT",         TK_COUNT},
    {"CREATE",        TK_CREATE},
dengyihao's avatar
dengyihao 已提交
68
    {"CONTAINS",      TK_CONTAINS}, 
X
Xiaoyu Wang 已提交
69 70
    {"DATABASE",      TK_DATABASE},
    {"DATABASES",     TK_DATABASES},
X
Xiaoyu Wang 已提交
71
    // {"DAYS",          TK_DAYS},
X
Xiaoyu Wang 已提交
72
    {"DBS",           TK_DBS},
X
Xiaoyu Wang 已提交
73
    {"DELETE",        TK_DELETE},
X
Xiaoyu Wang 已提交
74 75 76 77 78 79 80
    {"DESC",          TK_DESC},
    {"DESCRIBE",      TK_DESCRIBE},
    {"DISTINCT",      TK_DISTINCT},
    {"DNODE",         TK_DNODE},
    {"DNODES",        TK_DNODES},
    {"DOUBLE",        TK_DOUBLE},
    {"DROP",          TK_DROP},
X
Xiaoyu Wang 已提交
81
    {"DURATION",      TK_DURATION},
X
Xiaoyu Wang 已提交
82 83 84 85 86 87 88 89 90 91
    {"EXISTS",        TK_EXISTS},
    {"EXPLAIN",       TK_EXPLAIN},
    {"FILE_FACTOR",   TK_FILE_FACTOR},
    {"FILL",          TK_FILL},
    {"FIRST",         TK_FIRST},
    {"FLOAT",         TK_FLOAT},
    {"FROM",          TK_FROM},
    {"FSYNC",         TK_FSYNC},
    {"FUNCTION",      TK_FUNCTION},
    {"FUNCTIONS",     TK_FUNCTIONS},
92
    {"GRANT",         TK_GRANT},
X
Xiaoyu Wang 已提交
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
    {"GRANTS",        TK_GRANTS},
    {"GROUP",         TK_GROUP},
    {"HAVING",        TK_HAVING},
    {"IF",            TK_IF},
    {"IMPORT",        TK_IMPORT},
    {"IN",            TK_IN},
    {"INDEX",         TK_INDEX},
    {"INDEXES",       TK_INDEXES},
    {"INNER",         TK_INNER},
    {"INT",           TK_INT},
    {"INSERT",        TK_INSERT},
    {"INTEGER",       TK_INTEGER},
    {"INTERVAL",      TK_INTERVAL},
    {"INTO",          TK_INTO},
    {"IS",            TK_IS},
    {"JOIN",          TK_JOIN},
    {"JSON",          TK_JSON},
    {"KEEP",          TK_KEEP},
    {"KILL",          TK_KILL},
    {"LAST",          TK_LAST},
    {"LAST_ROW",      TK_LAST_ROW},
    {"LICENCE",       TK_LICENCE},
    {"LIKE",          TK_LIKE},
    {"LIMIT",         TK_LIMIT},
    {"LINEAR",        TK_LINEAR},
    {"LOCAL",         TK_LOCAL},
    {"MATCH",         TK_MATCH},
    {"MAXROWS",       TK_MAXROWS},
121
    {"MAX_DELAY",     TK_MAX_DELAY},
X
Xiaoyu Wang 已提交
122
    {"MERGE",         TK_MERGE},
X
Xiaoyu Wang 已提交
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
    {"MINROWS",       TK_MINROWS},
    {"MINUS",         TK_MINUS},
    {"MNODE",         TK_MNODE},
    {"MNODES",        TK_MNODES},
    {"MODIFY",        TK_MODIFY},
    {"MODULES",       TK_MODULES},
    {"NCHAR",         TK_NCHAR},
    {"NEXT",          TK_NEXT},
    {"NMATCH",        TK_NMATCH},
    {"NONE",          TK_NONE},
    {"NOT",           TK_NOT},
    {"NOW",           TK_NOW},
    {"NULL",          TK_NULL},
    {"NULLS",         TK_NULLS},
    {"OFFSET",        TK_OFFSET},
    {"ON",            TK_ON},
    {"OR",            TK_OR},
    {"ORDER",         TK_ORDER},
    {"OUTPUTTYPE",    TK_OUTPUTTYPE},
    {"PARTITION",     TK_PARTITION},
    {"PASS",          TK_PASS},
    {"PAGES",         TK_PAGES},
    {"PAGESIZE",      TK_PAGESIZE},
    {"PORT",          TK_PORT},
    {"PPS",           TK_PPS},
    {"PRECISION",     TK_PRECISION},
    {"PRIVILEGE",     TK_PRIVILEGE},
    {"PREV",          TK_PREV},
    {"QNODE",         TK_QNODE},
    {"QNODES",        TK_QNODES},
    {"QTIME",         TK_QTIME},
    {"QUERIES",       TK_QUERIES},
    {"QUERY",         TK_QUERY},
    {"RATIO",         TK_RATIO},
157
    {"READ",          TK_READ},
X
Xiaoyu Wang 已提交
158
    {"REDISTRIBUTE",  TK_REDISTRIBUTE},
159
    {"RENAME",        TK_RENAME},
X
Xiaoyu Wang 已提交
160 161 162
    {"REPLICA",       TK_REPLICA},
    {"RESET",         TK_RESET},
    {"RETENTIONS",    TK_RETENTIONS},
163
    {"REVOKE",        TK_REVOKE},
X
Xiaoyu Wang 已提交
164
    {"ROLLUP",        TK_ROLLUP},
X
Xiaoyu Wang 已提交
165
    {"SCHEMALESS",    TK_SCHEMALESS},
X
Xiaoyu Wang 已提交
166 167 168 169 170
    {"SCORES",        TK_SCORES},
    {"SELECT",        TK_SELECT},
    {"SESSION",       TK_SESSION},
    {"SET",           TK_SET},
    {"SHOW",          TK_SHOW},
171
    {"SINGLE_STABLE", TK_SINGLE_STABLE},
X
Xiaoyu Wang 已提交
172 173 174 175 176 177 178
    {"SLIDING",       TK_SLIDING},
    {"SLIMIT",        TK_SLIMIT},
    {"SMA",           TK_SMA},
    {"SMALLINT",      TK_SMALLINT},
    {"SNODE",         TK_SNODE},
    {"SNODES",        TK_SNODES},
    {"SOFFSET",       TK_SOFFSET},
179
    {"SPLIT",         TK_SPLIT},
X
Xiaoyu Wang 已提交
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
    {"STABLE",        TK_STABLE},
    {"STABLES",       TK_STABLES},
    {"STATE",         TK_STATE},
    {"STATE_WINDOW",  TK_STATE_WINDOW},
    {"STORAGE",       TK_STORAGE},
    {"STREAM",        TK_STREAM},
    {"STREAMS",       TK_STREAMS},
    {"STRICT",        TK_STRICT},
    {"SYNCDB",        TK_SYNCDB},
    {"TABLE",         TK_TABLE},
    {"TABLES",        TK_TABLES},
    {"TAG",           TK_TAG},
    {"TAGS",          TK_TAGS},
    {"TBNAME",        TK_TBNAME},
    {"TIMESTAMP",     TK_TIMESTAMP},
    {"TIMEZONE",      TK_TIMEZONE},
    {"TINYINT",       TK_TINYINT},
197
    {"TO",            TK_TO},
X
Xiaoyu Wang 已提交
198 199 200
    {"TODAY",         TK_TODAY},
    {"TOPIC",         TK_TOPIC},
    {"TOPICS",        TK_TOPICS},
201 202
    {"TRANSACTION",   TK_TRANSACTION},
    {"TRANSACTIONS",  TK_TRANSACTIONS},
X
Xiaoyu Wang 已提交
203 204 205 206 207 208 209 210 211 212 213 214 215 216
    {"TRIGGER",       TK_TRIGGER},
    {"TSERIES",       TK_TSERIES},
    {"TTL",           TK_TTL},
    {"UNION",         TK_UNION},
    {"UNSIGNED",      TK_UNSIGNED},
    {"USE",           TK_USE},
    {"USER",          TK_USER},
    {"USERS",         TK_USERS},
    {"USING",         TK_USING},
    {"VALUE",         TK_VALUE},
    {"VALUES",        TK_VALUES},
    {"VARCHAR",       TK_VARCHAR},
    {"VARIABLES",     TK_VARIABLES},
    {"VERBOSE",       TK_VERBOSE},
X
Xiaoyu Wang 已提交
217
    {"VGROUP",        TK_VGROUP},
X
Xiaoyu Wang 已提交
218 219 220 221 222 223
    {"VGROUPS",       TK_VGROUPS},
    {"VNODES",        TK_VNODES},
    {"WAL",           TK_WAL},
    {"WATERMARK",     TK_WATERMARK},
    {"WHERE",         TK_WHERE},
    {"WINDOW_CLOSE",  TK_WINDOW_CLOSE},
224
    {"WRITE",         TK_WRITE},
X
Xiaoyu Wang 已提交
225
    {"_C0",           TK_ROWTS},
X
Xiaoyu Wang 已提交
226 227 228 229 230 231
    {"_QENDTS",       TK_QENDTS},
    {"_QSTARTTS",     TK_QSTARTTS},
    {"_ROWTS",        TK_ROWTS},
    {"_WDURATION",    TK_WDURATION},
    {"_WENDTS",       TK_WENDTS},
    {"_WSTARTTS",     TK_WSTARTTS},
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
    // {"ID",           TK_ID},
    // {"STRING",       TK_STRING},
    // {"EQ",           TK_EQ},
    // {"NE",           TK_NE},
    // {"ISNULL",       TK_ISNULL},
    // {"NOTNULL",      TK_NOTNULL},
    // {"GLOB",         TK_GLOB},
    // {"GT",           TK_GT},
    // {"GE",           TK_GE},
    // {"LT",           TK_LT},
    // {"LE",           TK_LE},
    // {"BITAND",       TK_BITAND},
    // {"BITOR",        TK_BITOR},
    // {"LSHIFT",       TK_LSHIFT},
    // {"RSHIFT",       TK_RSHIFT},
    // {"PLUS",         TK_PLUS},
    // {"DIVIDE",       TK_DIVIDE},
    // {"TIMES",        TK_TIMES},
    // {"STAR",         TK_STAR},
    // {"SLASH",        TK_SLASH},
    // {"REM ",         TK_REM},
    // {"||",           TK_CONCAT},
    // {"UMINUS",       TK_UMINUS},
    // {"UPLUS",        TK_UPLUS},
    // {"BITNOT",       TK_BITNOT},
    // {"DOT",          TK_DOT},
    // {"CTIME",        TK_CTIME},
    // {"LP",           TK_LP},
    // {"RP",           TK_RP},
    // {"COMMA",        TK_COMMA},
    // {"EVERY",        TK_EVERY},
    // {"VARIABLE",     TK_VARIABLE},
    // {"UPDATE",       TK_UPDATE},
    // {"CHANGE",       TK_CHANGE},
    // {"COLON",        TK_COLON},
    // {"ABORT",        TK_ABORT},
    // {"AFTER",        TK_AFTER},
    // {"ATTACH",       TK_ATTACH},
    // {"BEFORE",       TK_BEFORE},
    // {"BEGIN",        TK_BEGIN},
    // {"CASCADE",      TK_CASCADE},
    // {"CONFLICT",     TK_CONFLICT},
    // {"COPY",         TK_COPY},
    // {"DEFERRED",     TK_DEFERRED},
    // {"DELIMITERS",   TK_DELIMITERS},
    // {"DETACH",       TK_DETACH},
    // {"EACH",         TK_EACH},
    // {"END",          TK_END},
    // {"FAIL",         TK_FAIL},
    // {"FOR",          TK_FOR},
    // {"IGNORE",       TK_IGNORE},
    // {"IMMEDIATE",    TK_IMMEDIATE},
    // {"INITIALLY",    TK_INITIALLY},
    // {"INSTEAD",      TK_INSTEAD},
    // {"KEY",          TK_KEY},
    // {"OF",           TK_OF},
    // {"RAISE",        TK_RAISE},
    // {"REPLACE",      TK_REPLACE},
    // {"RESTRICT",     TK_RESTRICT},
    // {"ROW",          TK_ROW},
    // {"STATEMENT",    TK_STATEMENT},
    // {"VIEW",         TK_VIEW},
    // {"SEMI",         TK_SEMI},
295
    // {"PARTITIONS",   TK_PARTITIONS},
296
    // {"MODE",         TK_MODE},
H
hzcheng 已提交
297
};
X
Xiaoyu Wang 已提交
298
// clang-format on
H
hzcheng 已提交
299 300 301 302 303 304 305 306 307 308 309 310 311

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
312
static void* keywordHashTable = NULL;
H
hzcheng 已提交
313

S
TD-1057  
Shengliang Guan 已提交
314
static void doInitKeywordsTable(void) {
315
  int numOfEntries = tListLen(keywordTable);
X
Xiaoyu Wang 已提交
316

H
Haojun Liao 已提交
317
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
318
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
319
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
320
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
321
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
322
  }
323 324
}

wafwerar's avatar
wafwerar 已提交
325
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
326

327
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
328
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
X
Xiaoyu Wang 已提交
329

H
hjxilinx 已提交
330
  char key[512] = {0};
X
Xiaoyu Wang 已提交
331
  if (n > tListLen(key)) {  // too long token, can not be any other token type
332
    return TK_NK_ID;
333
  }
X
Xiaoyu Wang 已提交
334

H
hzcheng 已提交
335 336
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
337
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
338 339 340 341 342
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
343
  if (keywordHashTable == NULL) {
344
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
345
  }
H
Haojun Liao 已提交
346

H
Haojun Liao 已提交
347
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
X
Xiaoyu Wang 已提交
348
  return (pKey != NULL) ? (*pKey)->type : TK_NK_ID;
H
hzcheng 已提交
349 350
}

H
huili 已提交
351
/*
352 353 354
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
355
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
356
  uint32_t i;
H
hzcheng 已提交
357 358 359 360 361 362 363 364
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
365
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
366 367 368
      return i;
    }
    case ':': {
369
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
370 371 372 373 374 375
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
376
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
377
        return i;
378 379 380
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
381
      }
X
Xiaoyu Wang 已提交
382
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
383 384 385
      return 1;
    }
    case '(': {
386
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
387 388 389
      return 1;
    }
    case ')': {
390
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
391 392 393
      return 1;
    }
    case ';': {
394
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
395 396 397
      return 1;
    }
    case '+': {
398
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
399 400 401
      return 1;
    }
    case '*': {
402
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
403 404 405 406
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
407
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
408 409 410 411 412
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
413
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
414 415 416
      return i;
    }
    case '%': {
417
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
418 419 420
      return 1;
    }
    case '=': {
421
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
422 423 424 425
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
426
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
427 428
        return 2;
      } else if (z[1] == '>') {
429
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
430 431
        return 2;
      } else if (z[1] == '<') {
432
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
433 434
        return 2;
      } else {
435
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
436 437 438 439 440
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
441
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
442 443
        return 2;
      } else if (z[1] == '>') {
444
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
445 446
        return 2;
      } else {
447
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
448 449 450 451 452
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
453
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
454 455
        return 2;
      } else {
456
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
457 458 459 460 461
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
462
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
463 464
        return 1;
      } else {
465
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
466 467 468 469
        return 2;
      }
    }
    case ',': {
470
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
471 472 473
      return 1;
    }
    case '&': {
474
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
475 476 477
      return 1;
    }
    case '~': {
478
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
479 480
      return 1;
    }
S
slguan 已提交
481
    case '?': {
482
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
483 484
      return 1;
    }
485
    case '`':
H
hzcheng 已提交
486 487
    case '\'':
    case '"': {
S
slguan 已提交
488 489
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
490
      for (i = 1; z[i]; i++) {
491
        if (delim != '`' && z[i] == '\\') {  // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
492 493 494
          i++;
          continue;
        }
X
Xiaoyu Wang 已提交
495

496
        if (z[i] == delim) {
H
hzcheng 已提交
497 498 499
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
500
            strEnd = true;
H
hzcheng 已提交
501 502 503 504
            break;
          }
        }
      }
X
Xiaoyu Wang 已提交
505

H
hzcheng 已提交
506
      if (z[i]) i++;
H
huili 已提交
507

S
slguan 已提交
508
      if (strEnd) {
X
Xiaoyu Wang 已提交
509
        *tokenId = (delim == '`') ? TK_NK_ID : TK_NK_STRING;
S
slguan 已提交
510 511
        return i;
      }
H
huili 已提交
512

S
slguan 已提交
513
      break;
H
hzcheng 已提交
514 515
    }
    case '.': {
S
slguan 已提交
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

533
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
534 535
        return i;
      } else {
536
        *tokenId = TK_NK_DOT;
S
slguan 已提交
537 538 539 540 541 542 543
        return 1;
      }
    }

    case '0': {
      char next = z[1];

X
Xiaoyu Wang 已提交
544
      if (next == 'b') {  // bin number
545
        *tokenId = TK_NK_BIN;
S
slguan 已提交
546 547 548 549 550 551 552 553
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
X
Xiaoyu Wang 已提交
554
      } else if (next == 'x') {  // hex number
555
        *tokenId = TK_NK_HEX;
S
slguan 已提交
556 557 558 559 560 561 562 563 564
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
565 566 567 568 569 570 571 572 573 574
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
575
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
576 577 578
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
579
      /* here is the 1u/1a/2s/3m/9y */
X
Xiaoyu Wang 已提交
580 581 582
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' ||
           z[i] == 'n' || z[i] == 'y' || z[i] == 'w' || z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' ||
           z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' || z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
583
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
584
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
585 586 587 588 589 590 591 592 593 594
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
595
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
596 597 598 599
        seg++;
      }

      if (seg == 4) {  // ip address
600
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
601
        return i;
X
Xiaoyu Wang 已提交
602 603
      } else if (seg > 2) {
        break;
H
hzcheng 已提交
604 605 606 607 608 609 610 611
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
612
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
613 614 615
      }
      return i;
    }
X
Xiaoyu Wang 已提交
616 617 618 619 620 621
    // case '[': {
    //   for (i = 1; z[i] && z[i - 1] != ']'; i++) {
    //   }
    //   *tokenId = TK_NK_ID;
    //   return i;
    // }
H
hzcheng 已提交
622 623 624 625
    case 'T':
    case 't':
    case 'F':
    case 'f': {
X
Xiaoyu Wang 已提交
626
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
627 628 629
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
630
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
631 632 633 634
        return i;
      }
    }
    default: {
X
Xiaoyu Wang 已提交
635
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
H
hzcheng 已提交
636 637
        break;
      }
X
Xiaoyu Wang 已提交
638
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
639
      }
640
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
641 642 643 644
      return i;
    }
  }

645
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
646 647 648
  return 0;
}

X
Xiaoyu Wang 已提交
649 650 651
SToken tscReplaceStrToken(char** str, SToken* token, const char* newToken) {
  char*   src = *str;
  size_t  nsize = strlen(newToken);
D
dapan1121 已提交
652 653
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
X
Xiaoyu Wang 已提交
654
  SToken  ntoken;
D
dapan1121 已提交
655

wafwerar's avatar
wafwerar 已提交
656
  *str = taosMemoryCalloc(1, size);
D
dapan1121 已提交
657 658

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
659
  strcat(*str, newToken);
D
dapan1121 已提交
660 661
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
662
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
663 664
  ntoken.z = *str + bsize;

wafwerar's avatar
wafwerar 已提交
665
  taosMemoryFreeClear(src);
D
dapan1121 已提交
666 667 668 669

  return ntoken;
}

670
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
671
  SToken t0 = {0};
S
slguan 已提交
672

H
hzcheng 已提交
673 674
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
675 676
    t0.n = 0;
    return t0;
H
hzcheng 已提交
677 678
  }

679
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
680 681 682
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
683
    int32_t numOfComma = 0;
X
Xiaoyu Wang 已提交
684
    char    t = str[*i];
H
Haojun Liao 已提交
685 686 687 688
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
689
      }
X
Xiaoyu Wang 已提交
690

H
Haojun Liao 已提交
691
      t = str[++(*i)];
S
slguan 已提交
692
    }
H
hzcheng 已提交
693

694
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
695
    break;
S
slguan 已提交
696

H
Haojun Liao 已提交
697 698
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
699
    bool ignore = false;
S
slguan 已提交
700 701
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
702
        ignore = true;
S
slguan 已提交
703 704 705 706
        break;
      }
    }

H
Haojun Liao 已提交
707
    if (!ignore) {
S
slguan 已提交
708 709
      break;
    }
H
Haojun Liao 已提交
710
#endif
H
hzcheng 已提交
711 712
  }

713
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
714
    t0.n = 0;
D
dapan1121 已提交
715
    t0.type = 0;
S
slguan 已提交
716 717 718 719 720 721 722 723
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
724
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
725 726

    // only id and string are valid
727
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
728
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
729 730 731 732 733 734 735 736 737
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
738
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
739
      len = tGetToken(&str[*i + t0.n], &type);
740
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
741 742 743
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
744 745 746
    }
  }

X
Xiaoyu Wang 已提交
747
  t0.z = (char*)str + (*i);
S
slguan 已提交
748 749 750
  *i += t0.n;

  return t0;
H
hzcheng 已提交
751 752
}

X
Xiaoyu Wang 已提交
753
bool taosIsKeyWordToken(const char* z, int32_t len) { return (tKeywordCode((char*)z, len) != TK_NK_ID); }
H
Haojun Liao 已提交
754 755

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
756 757 758 759
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
760
}
761

H
Haojun Liao 已提交
762
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
763
  assert(pToken != NULL && buf != NULL && len > pToken->n);
X
Xiaoyu Wang 已提交
764

765 766 767
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
768
  SToken token = *pToken;
769 770 771
  token.z = buf;
  return token;
}