parTokenizer.c 20.3 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
hjxilinx 已提交
18
#include "taosdef.h"
X
Xiaoyu Wang 已提交
19
#include "thash.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

X
Xiaoyu Wang 已提交
29
// clang-format off
S
slguan 已提交
30
// keywords in sql string
H
hzcheng 已提交
31
static SKeyword keywordTable[] = {
X
Xiaoyu Wang 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
    {"ACCOUNT",       TK_ACCOUNT},
    {"ACCOUNTS",      TK_ACCOUNTS},
    {"ADD",           TK_ADD},
    {"AGGREGATE",     TK_AGGREGATE},
    {"ALL",           TK_ALL},
    {"ALTER",         TK_ALTER},
    {"ANALYZE",       TK_ANALYZE},
    {"AND",           TK_AND},
    {"APPS",          TK_APPS},
    {"AS",            TK_AS},
    {"ASC",           TK_ASC},
    {"AT_ONCE",       TK_AT_ONCE},
    {"BETWEEN",       TK_BETWEEN},
    {"BINARY",        TK_BINARY},
    {"BIGINT",        TK_BIGINT},
    {"BNODE",         TK_BNODE},
    {"BNODES",        TK_BNODES},
    {"BOOL",          TK_BOOL},
    {"BUFFER",        TK_BUFFER},
    {"BUFSIZE",       TK_BUFSIZE},
    {"BY",            TK_BY},
    {"CACHE",         TK_CACHE},
    {"CACHELAST",     TK_CACHELAST},
    {"CAST",          TK_CAST},
    {"CLUSTER",       TK_CLUSTER},
    {"COLUMN",        TK_COLUMN},
    {"COMMENT",       TK_COMMENT},
    {"COMP",          TK_COMP},
    {"COMPACT",       TK_COMPACT},
    {"CONNS",         TK_CONNS},
    {"CONNECTION",    TK_CONNECTION},
    {"CONNECTIONS",   TK_CONNECTIONS},
    {"COUNT",         TK_COUNT},
    {"CREATE",        TK_CREATE},
    {"DATABASE",      TK_DATABASE},
    {"DATABASES",     TK_DATABASES},
    {"DAYS",          TK_DAYS},
    {"DBS",           TK_DBS},
    {"DELAY",         TK_DELAY},
    {"DESC",          TK_DESC},
    {"DESCRIBE",      TK_DESCRIBE},
    {"DISTINCT",      TK_DISTINCT},
    {"DNODE",         TK_DNODE},
    {"DNODES",        TK_DNODES},
    {"DOUBLE",        TK_DOUBLE},
    {"DROP",          TK_DROP},
    {"EXISTS",        TK_EXISTS},
    {"EXPLAIN",       TK_EXPLAIN},
    {"FILE_FACTOR",   TK_FILE_FACTOR},
    {"FILL",          TK_FILL},
    {"FIRST",         TK_FIRST},
    {"FLOAT",         TK_FLOAT},
    {"FROM",          TK_FROM},
    {"FSYNC",         TK_FSYNC},
    {"FUNCTION",      TK_FUNCTION},
    {"FUNCTIONS",     TK_FUNCTIONS},
88
    {"GRANT",         TK_GRANT},
X
Xiaoyu Wang 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
    {"GRANTS",        TK_GRANTS},
    {"GROUP",         TK_GROUP},
    {"HAVING",        TK_HAVING},
    {"IF",            TK_IF},
    {"IMPORT",        TK_IMPORT},
    {"IN",            TK_IN},
    {"INDEX",         TK_INDEX},
    {"INDEXES",       TK_INDEXES},
    {"INNER",         TK_INNER},
    {"INT",           TK_INT},
    {"INSERT",        TK_INSERT},
    {"INTEGER",       TK_INTEGER},
    {"INTERVAL",      TK_INTERVAL},
    {"INTO",          TK_INTO},
    {"IS",            TK_IS},
    {"JOIN",          TK_JOIN},
    {"JSON",          TK_JSON},
    {"KEEP",          TK_KEEP},
    {"KILL",          TK_KILL},
    {"LAST",          TK_LAST},
    {"LAST_ROW",      TK_LAST_ROW},
    {"LICENCE",       TK_LICENCE},
    {"LIKE",          TK_LIKE},
    {"LIMIT",         TK_LIMIT},
    {"LINEAR",        TK_LINEAR},
    {"LOCAL",         TK_LOCAL},
    {"MATCH",         TK_MATCH},
    {"MAXROWS",       TK_MAXROWS},
    {"MINROWS",       TK_MINROWS},
    {"MINUS",         TK_MINUS},
    {"MNODE",         TK_MNODE},
    {"MNODES",        TK_MNODES},
    {"MODIFY",        TK_MODIFY},
    {"MODULES",       TK_MODULES},
    {"NCHAR",         TK_NCHAR},
    {"NEXT",          TK_NEXT},
    {"NMATCH",        TK_NMATCH},
    {"NONE",          TK_NONE},
    {"NOT",           TK_NOT},
    {"NOW",           TK_NOW},
    {"NULL",          TK_NULL},
    {"NULLS",         TK_NULLS},
    {"OFFSET",        TK_OFFSET},
    {"ON",            TK_ON},
    {"OR",            TK_OR},
    {"ORDER",         TK_ORDER},
    {"OUTPUTTYPE",    TK_OUTPUTTYPE},
    {"PARTITION",     TK_PARTITION},
    {"PASS",          TK_PASS},
    {"PAGES",         TK_PAGES},
    {"PAGESIZE",      TK_PAGESIZE},
    {"PORT",          TK_PORT},
    {"PPS",           TK_PPS},
    {"PRECISION",     TK_PRECISION},
    {"PRIVILEGE",     TK_PRIVILEGE},
    {"PREV",          TK_PREV},
    {"QNODE",         TK_QNODE},
    {"QNODES",        TK_QNODES},
    {"QTIME",         TK_QTIME},
    {"QUERIES",       TK_QUERIES},
    {"QUERY",         TK_QUERY},
    {"RATIO",         TK_RATIO},
151 152
    {"READ",          TK_READ},
    {"RENAME",        TK_RENAME},
X
Xiaoyu Wang 已提交
153 154 155
    {"REPLICA",       TK_REPLICA},
    {"RESET",         TK_RESET},
    {"RETENTIONS",    TK_RETENTIONS},
156
    {"REVOKE",        TK_REVOKE},
X
Xiaoyu Wang 已提交
157 158 159 160 161 162 163
    {"ROLLUP",        TK_ROLLUP},
    {"SCHEMA",        TK_SCHEMA},
    {"SCORES",        TK_SCORES},
    {"SELECT",        TK_SELECT},
    {"SESSION",       TK_SESSION},
    {"SET",           TK_SET},
    {"SHOW",          TK_SHOW},
164
    {"SINGLE_STABLE", TK_SINGLE_STABLE},
X
Xiaoyu Wang 已提交
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
    {"SLIDING",       TK_SLIDING},
    {"SLIMIT",        TK_SLIMIT},
    {"SMA",           TK_SMA},
    {"SMALLINT",      TK_SMALLINT},
    {"SNODE",         TK_SNODE},
    {"SNODES",        TK_SNODES},
    {"SOFFSET",       TK_SOFFSET},
    {"STABLE",        TK_STABLE},
    {"STABLES",       TK_STABLES},
    {"STATE",         TK_STATE},
    {"STATE_WINDOW",  TK_STATE_WINDOW},
    {"STORAGE",       TK_STORAGE},
    {"STREAM",        TK_STREAM},
    {"STREAMS",       TK_STREAMS},
    {"STRICT",        TK_STRICT},
    {"SYNCDB",        TK_SYNCDB},
    {"TABLE",         TK_TABLE},
    {"TABLES",        TK_TABLES},
    {"TAG",           TK_TAG},
    {"TAGS",          TK_TAGS},
    {"TBNAME",        TK_TBNAME},
    {"TIMESTAMP",     TK_TIMESTAMP},
    {"TIMEZONE",      TK_TIMEZONE},
    {"TINYINT",       TK_TINYINT},
189
    {"TO",            TK_TO},
X
Xiaoyu Wang 已提交
190 191 192
    {"TODAY",         TK_TODAY},
    {"TOPIC",         TK_TOPIC},
    {"TOPICS",        TK_TOPICS},
193 194
    {"TRANSACTION",   TK_TRANSACTION},
    {"TRANSACTIONS",  TK_TRANSACTIONS},
X
Xiaoyu Wang 已提交
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
    {"TRIGGER",       TK_TRIGGER},
    {"TSERIES",       TK_TSERIES},
    {"TTL",           TK_TTL},
    {"UNION",         TK_UNION},
    {"UNSIGNED",      TK_UNSIGNED},
    {"USE",           TK_USE},
    {"USER",          TK_USER},
    {"USERS",         TK_USERS},
    {"USING",         TK_USING},
    {"VALUE",         TK_VALUE},
    {"VALUES",        TK_VALUES},
    {"VARCHAR",       TK_VARCHAR},
    {"VARIABLES",     TK_VARIABLES},
    {"VERBOSE",       TK_VERBOSE},
    {"VGROUPS",       TK_VGROUPS},
    {"VNODES",        TK_VNODES},
    {"WAL",           TK_WAL},
    {"WATERMARK",     TK_WATERMARK},
    {"WHERE",         TK_WHERE},
    {"WINDOW_CLOSE",  TK_WINDOW_CLOSE},
    {"WITH",          TK_WITH},
216
    {"WRITE",         TK_WRITE},
X
Xiaoyu Wang 已提交
217 218 219 220 221 222
    {"_QENDTS",       TK_QENDTS},
    {"_QSTARTTS",     TK_QSTARTTS},
    {"_ROWTS",        TK_ROWTS},
    {"_WDURATION",    TK_WDURATION},
    {"_WENDTS",       TK_WENDTS},
    {"_WSTARTTS",     TK_WSTARTTS},
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
    // {"ID",           TK_ID},
    // {"STRING",       TK_STRING},
    // {"EQ",           TK_EQ},
    // {"NE",           TK_NE},
    // {"ISNULL",       TK_ISNULL},
    // {"NOTNULL",      TK_NOTNULL},
    // {"GLOB",         TK_GLOB},
    // {"GT",           TK_GT},
    // {"GE",           TK_GE},
    // {"LT",           TK_LT},
    // {"LE",           TK_LE},
    // {"BITAND",       TK_BITAND},
    // {"BITOR",        TK_BITOR},
    // {"LSHIFT",       TK_LSHIFT},
    // {"RSHIFT",       TK_RSHIFT},
    // {"PLUS",         TK_PLUS},
    // {"DIVIDE",       TK_DIVIDE},
    // {"TIMES",        TK_TIMES},
    // {"STAR",         TK_STAR},
    // {"SLASH",        TK_SLASH},
    // {"REM ",         TK_REM},
    // {"||",           TK_CONCAT},
    // {"UMINUS",       TK_UMINUS},
    // {"UPLUS",        TK_UPLUS},
    // {"BITNOT",       TK_BITNOT},
    // {"DOT",          TK_DOT},
    // {"CTIME",        TK_CTIME},
    // {"LP",           TK_LP},
    // {"RP",           TK_RP},
    // {"COMMA",        TK_COMMA},
    // {"EVERY",        TK_EVERY},
    // {"VARIABLE",     TK_VARIABLE},
    // {"UPDATE",       TK_UPDATE},
    // {"CHANGE",       TK_CHANGE},
    // {"COLON",        TK_COLON},
    // {"ABORT",        TK_ABORT},
    // {"AFTER",        TK_AFTER},
    // {"ATTACH",       TK_ATTACH},
    // {"BEFORE",       TK_BEFORE},
    // {"BEGIN",        TK_BEGIN},
    // {"CASCADE",      TK_CASCADE},
    // {"CONFLICT",     TK_CONFLICT},
    // {"COPY",         TK_COPY},
    // {"DEFERRED",     TK_DEFERRED},
    // {"DELIMITERS",   TK_DELIMITERS},
    // {"DETACH",       TK_DETACH},
    // {"EACH",         TK_EACH},
    // {"END",          TK_END},
    // {"FAIL",         TK_FAIL},
    // {"FOR",          TK_FOR},
    // {"IGNORE",       TK_IGNORE},
    // {"IMMEDIATE",    TK_IMMEDIATE},
    // {"INITIALLY",    TK_INITIALLY},
    // {"INSTEAD",      TK_INSTEAD},
    // {"KEY",          TK_KEY},
    // {"OF",           TK_OF},
    // {"RAISE",        TK_RAISE},
    // {"REPLACE",      TK_REPLACE},
    // {"RESTRICT",     TK_RESTRICT},
    // {"ROW",          TK_ROW},
    // {"STATEMENT",    TK_STATEMENT},
    // {"VIEW",         TK_VIEW},
    // {"SEMI",         TK_SEMI},
286
    // {"PARTITIONS",   TK_PARTITIONS},
287
    // {"MODE",         TK_MODE},
H
hzcheng 已提交
288
};
X
Xiaoyu Wang 已提交
289
// clang-format on
H
hzcheng 已提交
290 291 292 293 294 295 296 297 298 299 300 301 302

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
303
static void* keywordHashTable = NULL;
H
hzcheng 已提交
304

S
TD-1057  
Shengliang Guan 已提交
305
static void doInitKeywordsTable(void) {
306
  int numOfEntries = tListLen(keywordTable);
X
Xiaoyu Wang 已提交
307

H
Haojun Liao 已提交
308
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
309
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
310
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
311
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
312
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
313
  }
314 315
}

wafwerar's avatar
wafwerar 已提交
316
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
317

318
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
319
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
X
Xiaoyu Wang 已提交
320

H
hjxilinx 已提交
321
  char key[512] = {0};
X
Xiaoyu Wang 已提交
322
  if (n > tListLen(key)) {  // too long token, can not be any other token type
323
    return TK_NK_ID;
324
  }
X
Xiaoyu Wang 已提交
325

H
hzcheng 已提交
326 327
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
328
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
329 330 331 332 333
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
334
  if (keywordHashTable == NULL) {
335
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
336
  }
H
Haojun Liao 已提交
337

H
Haojun Liao 已提交
338
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
X
Xiaoyu Wang 已提交
339
  return (pKey != NULL) ? (*pKey)->type : TK_NK_ID;
H
hzcheng 已提交
340 341
}

H
huili 已提交
342
/*
343 344 345
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
346
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
347
  uint32_t i;
H
hzcheng 已提交
348 349 350 351 352 353 354 355
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
356
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
357 358 359
      return i;
    }
    case ':': {
360
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
361 362 363 364 365 366
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
367
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
368
        return i;
369 370 371
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
372
      }
X
Xiaoyu Wang 已提交
373
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
374 375 376
      return 1;
    }
    case '(': {
377
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
378 379 380
      return 1;
    }
    case ')': {
381
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
382 383 384
      return 1;
    }
    case ';': {
385
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
386 387 388
      return 1;
    }
    case '+': {
389
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
390 391 392
      return 1;
    }
    case '*': {
393
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
394 395 396 397
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
398
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
399 400 401 402 403
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
404
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
405 406 407
      return i;
    }
    case '%': {
408
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
409 410 411
      return 1;
    }
    case '=': {
412
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
413 414 415 416
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
417
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
418 419
        return 2;
      } else if (z[1] == '>') {
420
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
421 422
        return 2;
      } else if (z[1] == '<') {
423
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
424 425
        return 2;
      } else {
426
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
427 428 429 430 431
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
432
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
433 434
        return 2;
      } else if (z[1] == '>') {
435
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
436 437
        return 2;
      } else {
438
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
439 440 441 442 443
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
444
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
445 446
        return 2;
      } else {
447
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
448 449 450 451 452
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
453
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
454 455
        return 1;
      } else {
456
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
457 458 459 460
        return 2;
      }
    }
    case ',': {
461
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
462 463 464
      return 1;
    }
    case '&': {
465
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
466 467 468
      return 1;
    }
    case '~': {
469
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
470 471
      return 1;
    }
S
slguan 已提交
472
    case '?': {
473
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
474 475
      return 1;
    }
476
    case '`':
H
hzcheng 已提交
477 478
    case '\'':
    case '"': {
S
slguan 已提交
479 480
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
481
      for (i = 1; z[i]; i++) {
482
        if (delim != '`' && z[i] == '\\') {  // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
483 484 485
          i++;
          continue;
        }
X
Xiaoyu Wang 已提交
486

487
        if (z[i] == delim) {
H
hzcheng 已提交
488 489 490
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
491
            strEnd = true;
H
hzcheng 已提交
492 493 494 495
            break;
          }
        }
      }
X
Xiaoyu Wang 已提交
496

H
hzcheng 已提交
497
      if (z[i]) i++;
H
huili 已提交
498

S
slguan 已提交
499
      if (strEnd) {
X
Xiaoyu Wang 已提交
500
        *tokenId = (delim == '`') ? TK_NK_ID : TK_NK_STRING;
S
slguan 已提交
501 502
        return i;
      }
H
huili 已提交
503

S
slguan 已提交
504
      break;
H
hzcheng 已提交
505 506
    }
    case '.': {
S
slguan 已提交
507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

524
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
525 526
        return i;
      } else {
527
        *tokenId = TK_NK_DOT;
S
slguan 已提交
528 529 530 531 532 533 534
        return 1;
      }
    }

    case '0': {
      char next = z[1];

X
Xiaoyu Wang 已提交
535
      if (next == 'b') {  // bin number
536
        *tokenId = TK_NK_BIN;
S
slguan 已提交
537 538 539 540 541 542 543 544
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
X
Xiaoyu Wang 已提交
545
      } else if (next == 'x') {  // hex number
546
        *tokenId = TK_NK_HEX;
S
slguan 已提交
547 548 549 550 551 552 553 554 555
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
556 557 558 559 560 561 562 563 564 565
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
566
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
567 568 569
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
570
      /* here is the 1u/1a/2s/3m/9y */
X
Xiaoyu Wang 已提交
571 572 573
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' ||
           z[i] == 'n' || z[i] == 'y' || z[i] == 'w' || z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' ||
           z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' || z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
574
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
575
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
576 577 578 579 580 581 582 583 584 585
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
586
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
587 588 589 590
        seg++;
      }

      if (seg == 4) {  // ip address
591
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
592
        return i;
X
Xiaoyu Wang 已提交
593 594
      } else if (seg > 2) {
        break;
H
hzcheng 已提交
595 596 597 598 599 600 601 602
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
603
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
604 605 606 607 608 609
      }
      return i;
    }
    case '[': {
      for (i = 1; z[i] && z[i - 1] != ']'; i++) {
      }
610
      *tokenId = TK_NK_ID;
H
hzcheng 已提交
611 612 613 614 615 616
      return i;
    }
    case 'T':
    case 't':
    case 'F':
    case 'f': {
X
Xiaoyu Wang 已提交
617
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
618 619 620
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
621
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
622 623 624 625
        return i;
      }
    }
    default: {
X
Xiaoyu Wang 已提交
626
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
H
hzcheng 已提交
627 628
        break;
      }
X
Xiaoyu Wang 已提交
629
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
630
      }
631
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
632 633 634 635
      return i;
    }
  }

636
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
637 638 639
  return 0;
}

X
Xiaoyu Wang 已提交
640 641 642
SToken tscReplaceStrToken(char** str, SToken* token, const char* newToken) {
  char*   src = *str;
  size_t  nsize = strlen(newToken);
D
dapan1121 已提交
643 644
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
X
Xiaoyu Wang 已提交
645
  SToken  ntoken;
D
dapan1121 已提交
646

wafwerar's avatar
wafwerar 已提交
647
  *str = taosMemoryCalloc(1, size);
D
dapan1121 已提交
648 649

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
650
  strcat(*str, newToken);
D
dapan1121 已提交
651 652
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
653
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
654 655
  ntoken.z = *str + bsize;

wafwerar's avatar
wafwerar 已提交
656
  taosMemoryFreeClear(src);
D
dapan1121 已提交
657 658 659 660

  return ntoken;
}

661
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
662
  SToken t0 = {0};
S
slguan 已提交
663

H
hzcheng 已提交
664 665
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
666 667
    t0.n = 0;
    return t0;
H
hzcheng 已提交
668 669
  }

670
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
671 672 673
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
674
    int32_t numOfComma = 0;
X
Xiaoyu Wang 已提交
675
    char    t = str[*i];
H
Haojun Liao 已提交
676 677 678 679
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
680
      }
X
Xiaoyu Wang 已提交
681

H
Haojun Liao 已提交
682
      t = str[++(*i)];
S
slguan 已提交
683
    }
H
hzcheng 已提交
684

685
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
686
    break;
S
slguan 已提交
687

H
Haojun Liao 已提交
688 689
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
690
    bool ignore = false;
S
slguan 已提交
691 692
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
693
        ignore = true;
S
slguan 已提交
694 695 696 697
        break;
      }
    }

H
Haojun Liao 已提交
698
    if (!ignore) {
S
slguan 已提交
699 700
      break;
    }
H
Haojun Liao 已提交
701
#endif
H
hzcheng 已提交
702 703
  }

704
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
705 706 707 708 709 710 711 712 713
    t0.n = 0;
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
714
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
715 716

    // only id and string are valid
717
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
718
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
719 720 721 722 723 724 725 726 727
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
728
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
729
      len = tGetToken(&str[*i + t0.n], &type);
730
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
731 732 733
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
734 735 736
    }
  }

X
Xiaoyu Wang 已提交
737
  t0.z = (char*)str + (*i);
S
slguan 已提交
738 739 740
  *i += t0.n;

  return t0;
H
hzcheng 已提交
741 742
}

X
Xiaoyu Wang 已提交
743
bool taosIsKeyWordToken(const char* z, int32_t len) { return (tKeywordCode((char*)z, len) != TK_NK_ID); }
H
Haojun Liao 已提交
744 745

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
746 747 748 749
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
750
}
751

H
Haojun Liao 已提交
752
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
753
  assert(pToken != NULL && buf != NULL && len > pToken->n);
X
Xiaoyu Wang 已提交
754

755 756 757
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
758
  SToken token = *pToken;
759 760 761
  token.z = buf;
  return token;
}