parTokenizer.c 18.7 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
hjxilinx 已提交
18
#include "taosdef.h"
X
Xiaoyu Wang 已提交
19
#include "thash.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

S
slguan 已提交
29
// keywords in sql string
H
hzcheng 已提交
30
static SKeyword keywordTable[] = {
X
Xiaoyu Wang 已提交
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
    {"ACCOUNT", TK_ACCOUNT},
    {"ACCOUNTS", TK_ACCOUNTS},
    {"ADD", TK_ADD},
    {"AGGREGATE", TK_AGGREGATE},
    {"ALL", TK_ALL},
    {"ALTER", TK_ALTER},
    {"ANALYZE", TK_ANALYZE},
    {"AND", TK_AND},
    {"APPS", TK_APPS},
    {"AS", TK_AS},
    {"ASC", TK_ASC},
    {"AT_ONCE", TK_AT_ONCE},
    {"BETWEEN", TK_BETWEEN},
    {"BINARY", TK_BINARY},
    {"BIGINT", TK_BIGINT},
X
Xiaoyu Wang 已提交
46
    // {"BLOCKS", TK_BLOCKS},
X
Xiaoyu Wang 已提交
47 48 49
    {"BNODE", TK_BNODE},
    {"BNODES", TK_BNODES},
    {"BOOL", TK_BOOL},
S
Shengliang Guan 已提交
50
    {"BUFFER", TK_BUFFER},
X
Xiaoyu Wang 已提交
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
    {"BUFSIZE", TK_BUFSIZE},
    {"BY", TK_BY},
    {"CACHE", TK_CACHE},
    {"CACHELAST", TK_CACHELAST},
    {"CAST", TK_CAST},
    {"CLUSTER", TK_CLUSTER},
    {"COLUMN", TK_COLUMN},
    {"COMMENT", TK_COMMENT},
    {"COMP", TK_COMP},
    {"COMPACT", TK_COMPACT},
    {"CONNS", TK_CONNS},
    {"CONNECTION", TK_CONNECTION},
    {"CONNECTIONS", TK_CONNECTIONS},
    {"COUNT", TK_COUNT},
    {"CREATE", TK_CREATE},
    {"DATABASE", TK_DATABASE},
    {"DATABASES", TK_DATABASES},
    {"DAYS", TK_DAYS},
    {"DBS", TK_DBS},
    {"DELAY", TK_DELAY},
    {"DESC", TK_DESC},
    {"DESCRIBE", TK_DESCRIBE},
    {"DISTINCT", TK_DISTINCT},
    {"DNODE", TK_DNODE},
    {"DNODES", TK_DNODES},
    {"DOUBLE", TK_DOUBLE},
    {"DROP", TK_DROP},
    {"EXISTS", TK_EXISTS},
    {"EXPLAIN", TK_EXPLAIN},
    {"FILE_FACTOR", TK_FILE_FACTOR},
    {"FILL", TK_FILL},
    {"FIRST", TK_FIRST},
    {"FLOAT", TK_FLOAT},
    {"FROM", TK_FROM},
    {"FSYNC", TK_FSYNC},
    {"FUNCTION", TK_FUNCTION},
    {"FUNCTIONS", TK_FUNCTIONS},
    {"GRANTS", TK_GRANTS},
    {"GROUP", TK_GROUP},
    {"HAVING", TK_HAVING},
    {"IF", TK_IF},
    {"IMPORT", TK_IMPORT},
    {"IN", TK_IN},
    {"INDEX", TK_INDEX},
    {"INDEXES", TK_INDEXES},
    {"INNER", TK_INNER},
    {"INT", TK_INT},
    {"INSERT", TK_INSERT},
    {"INTEGER", TK_INTEGER},
    {"INTERVAL", TK_INTERVAL},
    {"INTO", TK_INTO},
    {"IS", TK_IS},
    {"JOIN", TK_JOIN},
    {"JSON", TK_JSON},
    {"KEEP", TK_KEEP},
    {"KILL", TK_KILL},
    {"LAST", TK_LAST},
    {"LAST_ROW", TK_LAST_ROW},
    {"LICENCE", TK_LICENCE},
    {"LIKE", TK_LIKE},
    {"LIMIT", TK_LIMIT},
    {"LINEAR", TK_LINEAR},
    {"LOCAL", TK_LOCAL},
    {"MATCH", TK_MATCH},
    {"MAXROWS", TK_MAXROWS},
    {"MINROWS", TK_MINROWS},
    {"MINUS", TK_MINUS},
    {"MNODE", TK_MNODE},
    {"MNODES", TK_MNODES},
    {"MODIFY", TK_MODIFY},
    {"MODULES", TK_MODULES},
    {"NCHAR", TK_NCHAR},
    {"NMATCH", TK_NMATCH},
    {"NONE", TK_NONE},
    {"NOT", TK_NOT},
    {"NOW", TK_NOW},
    {"NULL", TK_NULL},
    {"NULLS", TK_NULLS},
    {"OFFSET", TK_OFFSET},
    {"ON", TK_ON},
    {"OR", TK_OR},
    {"ORDER", TK_ORDER},
    {"OUTPUTTYPE", TK_OUTPUTTYPE},
    {"PARTITION", TK_PARTITION},
    {"PASS", TK_PASS},
S
Shengliang Guan 已提交
136 137
    {"PAGES", TK_PAGES},
    {"PAGESIZE", TK_PAGESIZE},
X
Xiaoyu Wang 已提交
138 139 140 141 142 143 144 145 146 147
    {"PORT", TK_PORT},
    {"PPS", TK_PPS},
    {"PRECISION", TK_PRECISION},
    {"PRIVILEGE", TK_PRIVILEGE},
    {"PREV", TK_PREV},
    {"QNODE", TK_QNODE},
    {"QNODES", TK_QNODES},
    {"QTIME", TK_QTIME},
    {"QUERIES", TK_QUERIES},
    {"QUERY", TK_QUERY},
X
Xiaoyu Wang 已提交
148
    // {"QUORUM", TK_QUORUM},
X
Xiaoyu Wang 已提交
149 150 151 152 153 154 155 156 157 158 159
    {"RATIO", TK_RATIO},
    {"REPLICA", TK_REPLICA},
    {"RESET", TK_RESET},
    {"RETENTIONS", TK_RETENTIONS},
    {"ROLLUP", TK_ROLLUP},
    {"SCHEMA", TK_SCHEMA},
    {"SCORES", TK_SCORES},
    {"SELECT", TK_SELECT},
    {"SESSION", TK_SESSION},
    {"SET", TK_SET},
    {"SHOW", TK_SHOW},
160
    {"SINGLE_STABLE", TK_SINGLE_STABLE},
X
Xiaoyu Wang 已提交
161 162 163 164 165 166 167 168 169 170 171 172 173 174
    {"SLIDING", TK_SLIDING},
    {"SLIMIT", TK_SLIMIT},
    {"SMA", TK_SMA},
    {"SMALLINT", TK_SMALLINT},
    {"SNODE", TK_SNODE},
    {"SNODES", TK_SNODES},
    {"SOFFSET", TK_SOFFSET},
    {"STABLE", TK_STABLE},
    {"STABLES", TK_STABLES},
    {"STATE", TK_STATE},
    {"STATE_WINDOW", TK_STATE_WINDOW},
    {"STORAGE", TK_STORAGE},
    {"STREAM", TK_STREAM},
    {"STREAMS", TK_STREAMS},
X
Xiaoyu Wang 已提交
175
    // {"STREAM_MODE", TK_STREAM_MODE},
X
Xiaoyu Wang 已提交
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
    {"STRICT", TK_STRICT},
    {"SYNCDB", TK_SYNCDB},
    {"TABLE", TK_TABLE},
    {"TABLES", TK_TABLES},
    {"TAG", TK_TAG},
    {"TAGS", TK_TAGS},
    {"TBNAME", TK_TBNAME},
    {"TIMESTAMP", TK_TIMESTAMP},
    {"TIMEZONE", TK_TIMEZONE},
    {"TINYINT", TK_TINYINT},
    {"TODAY", TK_TODAY},
    {"TOPIC", TK_TOPIC},
    {"TOPICS", TK_TOPICS},
    {"TRIGGER", TK_TRIGGER},
    {"TSERIES", TK_TSERIES},
    {"TTL", TK_TTL},
    {"UNION", TK_UNION},
    {"UNSIGNED", TK_UNSIGNED},
    {"USE", TK_USE},
    {"USER", TK_USER},
    {"USERS", TK_USERS},
    {"USING", TK_USING},
    {"VALUE", TK_VALUE},
    {"VALUES", TK_VALUES},
    {"VARCHAR", TK_VARCHAR},
    {"VARIABLES", TK_VARIABLES},
    {"VERBOSE", TK_VERBOSE},
    {"VGROUPS", TK_VGROUPS},
    {"VNODES", TK_VNODES},
    {"WAL", TK_WAL},
    {"WATERMARK", TK_WATERMARK},
    {"WHERE", TK_WHERE},
    {"WINDOW_CLOSE", TK_WINDOW_CLOSE},
    {"WITH", TK_WITH},
    {"_QENDTS", TK_QENDTS},
    {"_QSTARTTS", TK_QSTARTTS},
    {"_ROWTS", TK_ROWTS},
    {"_WDURATION", TK_WDURATION},
    {"_WENDTS", TK_WENDTS},
    {"_WSTARTTS", TK_WSTARTTS},
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
    // {"ID",           TK_ID},
    // {"STRING",       TK_STRING},
    // {"EQ",           TK_EQ},
    // {"NE",           TK_NE},
    // {"ISNULL",       TK_ISNULL},
    // {"NOTNULL",      TK_NOTNULL},
    // {"GLOB",         TK_GLOB},
    // {"GT",           TK_GT},
    // {"GE",           TK_GE},
    // {"LT",           TK_LT},
    // {"LE",           TK_LE},
    // {"BITAND",       TK_BITAND},
    // {"BITOR",        TK_BITOR},
    // {"LSHIFT",       TK_LSHIFT},
    // {"RSHIFT",       TK_RSHIFT},
    // {"PLUS",         TK_PLUS},
    // {"DIVIDE",       TK_DIVIDE},
    // {"TIMES",        TK_TIMES},
    // {"STAR",         TK_STAR},
    // {"SLASH",        TK_SLASH},
    // {"REM ",         TK_REM},
    // {"||",           TK_CONCAT},
    // {"UMINUS",       TK_UMINUS},
    // {"UPLUS",        TK_UPLUS},
    // {"BITNOT",       TK_BITNOT},
    // {"DOT",          TK_DOT},
    // {"CTIME",        TK_CTIME},
    // {"LP",           TK_LP},
    // {"RP",           TK_RP},
    // {"COMMA",        TK_COMMA},
    // {"EVERY",        TK_EVERY},
    // {"VARIABLE",     TK_VARIABLE},
    // {"UPDATE",       TK_UPDATE},
    // {"CHANGE",       TK_CHANGE},
    // {"COLON",        TK_COLON},
    // {"ABORT",        TK_ABORT},
    // {"AFTER",        TK_AFTER},
    // {"ATTACH",       TK_ATTACH},
    // {"BEFORE",       TK_BEFORE},
    // {"BEGIN",        TK_BEGIN},
    // {"CASCADE",      TK_CASCADE},
    // {"CONFLICT",     TK_CONFLICT},
    // {"COPY",         TK_COPY},
    // {"DEFERRED",     TK_DEFERRED},
    // {"DELIMITERS",   TK_DELIMITERS},
    // {"DETACH",       TK_DETACH},
    // {"EACH",         TK_EACH},
    // {"END",          TK_END},
    // {"FAIL",         TK_FAIL},
    // {"FOR",          TK_FOR},
    // {"IGNORE",       TK_IGNORE},
    // {"IMMEDIATE",    TK_IMMEDIATE},
    // {"INITIALLY",    TK_INITIALLY},
    // {"INSTEAD",      TK_INSTEAD},
    // {"KEY",          TK_KEY},
    // {"OF",           TK_OF},
    // {"RAISE",        TK_RAISE},
    // {"REPLACE",      TK_REPLACE},
    // {"RESTRICT",     TK_RESTRICT},
    // {"ROW",          TK_ROW},
    // {"STATEMENT",    TK_STATEMENT},
    // {"VIEW",         TK_VIEW},
    // {"SEMI",         TK_SEMI},
279
    // {"PARTITIONS",   TK_PARTITIONS},
280
    // {"MODE",         TK_MODE},
H
hzcheng 已提交
281 282 283 284 285 286 287 288 289 290 291 292 293 294
};

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
295
static void* keywordHashTable = NULL;
H
hzcheng 已提交
296

S
TD-1057  
Shengliang Guan 已提交
297
static void doInitKeywordsTable(void) {
298
  int numOfEntries = tListLen(keywordTable);
X
Xiaoyu Wang 已提交
299

H
Haojun Liao 已提交
300
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
301
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
302
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
303
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
304
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
305
  }
306 307
}

wafwerar's avatar
wafwerar 已提交
308
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
309

310
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
311
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
X
Xiaoyu Wang 已提交
312

H
hjxilinx 已提交
313
  char key[512] = {0};
X
Xiaoyu Wang 已提交
314
  if (n > tListLen(key)) {  // too long token, can not be any other token type
315
    return TK_NK_ID;
316
  }
X
Xiaoyu Wang 已提交
317

H
hzcheng 已提交
318 319
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
320
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
321 322 323 324 325
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
326
  if (keywordHashTable == NULL) {
327
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
328
  }
H
Haojun Liao 已提交
329

H
Haojun Liao 已提交
330
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
X
Xiaoyu Wang 已提交
331
  return (pKey != NULL) ? (*pKey)->type : TK_NK_ID;
H
hzcheng 已提交
332 333
}

H
huili 已提交
334
/*
335 336 337
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
338
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
339
  uint32_t i;
H
hzcheng 已提交
340 341 342 343 344 345 346 347
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
348
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
349 350 351
      return i;
    }
    case ':': {
352
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
353 354 355 356 357 358
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
359
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
360
        return i;
361 362 363
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
364
      }
X
Xiaoyu Wang 已提交
365
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
366 367 368
      return 1;
    }
    case '(': {
369
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
370 371 372
      return 1;
    }
    case ')': {
373
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
374 375 376
      return 1;
    }
    case ';': {
377
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
378 379 380
      return 1;
    }
    case '+': {
381
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
382 383 384
      return 1;
    }
    case '*': {
385
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
386 387 388 389
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
390
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
391 392 393 394 395
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
396
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
397 398 399
      return i;
    }
    case '%': {
400
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
401 402 403
      return 1;
    }
    case '=': {
404
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
405 406 407 408
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
409
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
410 411
        return 2;
      } else if (z[1] == '>') {
412
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
413 414
        return 2;
      } else if (z[1] == '<') {
415
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
416 417
        return 2;
      } else {
418
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
419 420 421 422 423
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
424
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
425 426
        return 2;
      } else if (z[1] == '>') {
427
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
428 429
        return 2;
      } else {
430
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
431 432 433 434 435
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
436
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
437 438
        return 2;
      } else {
439
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
440 441 442 443 444
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
445
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
446 447
        return 1;
      } else {
448
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
449 450 451 452
        return 2;
      }
    }
    case ',': {
453
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
454 455 456
      return 1;
    }
    case '&': {
457
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
458 459 460
      return 1;
    }
    case '~': {
461
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
462 463
      return 1;
    }
S
slguan 已提交
464
    case '?': {
465
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
466 467
      return 1;
    }
468
    case '`':
H
hzcheng 已提交
469 470
    case '\'':
    case '"': {
S
slguan 已提交
471 472
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
473
      for (i = 1; z[i]; i++) {
X
Xiaoyu Wang 已提交
474
        if (z[i] == '\\') {  // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
475 476 477
          i++;
          continue;
        }
X
Xiaoyu Wang 已提交
478

479
        if (z[i] == delim) {
H
hzcheng 已提交
480 481 482
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
483
            strEnd = true;
H
hzcheng 已提交
484 485 486 487
            break;
          }
        }
      }
X
Xiaoyu Wang 已提交
488

H
hzcheng 已提交
489
      if (z[i]) i++;
H
huili 已提交
490

S
slguan 已提交
491
      if (strEnd) {
X
Xiaoyu Wang 已提交
492
        *tokenId = (delim == '`') ? TK_NK_ID : TK_NK_STRING;
S
slguan 已提交
493 494
        return i;
      }
H
huili 已提交
495

S
slguan 已提交
496
      break;
H
hzcheng 已提交
497 498
    }
    case '.': {
S
slguan 已提交
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

516
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
517 518
        return i;
      } else {
519
        *tokenId = TK_NK_DOT;
S
slguan 已提交
520 521 522 523 524 525 526
        return 1;
      }
    }

    case '0': {
      char next = z[1];

X
Xiaoyu Wang 已提交
527
      if (next == 'b') {  // bin number
528
        *tokenId = TK_NK_BIN;
S
slguan 已提交
529 530 531 532 533 534 535 536
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
X
Xiaoyu Wang 已提交
537
      } else if (next == 'x') {  // hex number
538
        *tokenId = TK_NK_HEX;
S
slguan 已提交
539 540 541 542 543 544 545 546 547
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
548 549 550 551 552 553 554 555 556 557
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
558
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
559 560 561
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
562
      /* here is the 1u/1a/2s/3m/9y */
X
Xiaoyu Wang 已提交
563 564 565
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' ||
           z[i] == 'n' || z[i] == 'y' || z[i] == 'w' || z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' ||
           z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' || z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
566
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
567
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
568 569 570 571 572 573 574 575 576 577
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
578
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
579 580 581 582
        seg++;
      }

      if (seg == 4) {  // ip address
583
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
584 585 586 587 588 589 590 591 592
        return i;
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
593
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
594 595 596 597 598 599
      }
      return i;
    }
    case '[': {
      for (i = 1; z[i] && z[i - 1] != ']'; i++) {
      }
600
      *tokenId = TK_NK_ID;
H
hzcheng 已提交
601 602 603 604 605 606
      return i;
    }
    case 'T':
    case 't':
    case 'F':
    case 'f': {
X
Xiaoyu Wang 已提交
607
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
608 609 610
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
611
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
612 613 614 615
        return i;
      }
    }
    default: {
X
Xiaoyu Wang 已提交
616
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
H
hzcheng 已提交
617 618
        break;
      }
X
Xiaoyu Wang 已提交
619
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
620
      }
621
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
622 623 624 625
      return i;
    }
  }

626
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
627 628 629
  return 0;
}

X
Xiaoyu Wang 已提交
630 631 632
SToken tscReplaceStrToken(char** str, SToken* token, const char* newToken) {
  char*   src = *str;
  size_t  nsize = strlen(newToken);
D
dapan1121 已提交
633 634
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
X
Xiaoyu Wang 已提交
635
  SToken  ntoken;
D
dapan1121 已提交
636

wafwerar's avatar
wafwerar 已提交
637
  *str = taosMemoryCalloc(1, size);
D
dapan1121 已提交
638 639

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
640
  strcat(*str, newToken);
D
dapan1121 已提交
641 642
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
643
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
644 645
  ntoken.z = *str + bsize;

wafwerar's avatar
wafwerar 已提交
646
  taosMemoryFreeClear(src);
D
dapan1121 已提交
647 648 649 650

  return ntoken;
}

651
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
652
  SToken t0 = {0};
S
slguan 已提交
653

H
hzcheng 已提交
654 655
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
656 657
    t0.n = 0;
    return t0;
H
hzcheng 已提交
658 659
  }

660
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
661 662 663
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
664
    int32_t numOfComma = 0;
X
Xiaoyu Wang 已提交
665
    char    t = str[*i];
H
Haojun Liao 已提交
666 667 668 669
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
670
      }
X
Xiaoyu Wang 已提交
671

H
Haojun Liao 已提交
672
      t = str[++(*i)];
S
slguan 已提交
673
    }
H
hzcheng 已提交
674

675
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
676
    break;
S
slguan 已提交
677

H
Haojun Liao 已提交
678 679
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
680
    bool ignore = false;
S
slguan 已提交
681 682
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
683
        ignore = true;
S
slguan 已提交
684 685 686 687
        break;
      }
    }

H
Haojun Liao 已提交
688
    if (!ignore) {
S
slguan 已提交
689 690
      break;
    }
H
Haojun Liao 已提交
691
#endif
H
hzcheng 已提交
692 693
  }

694
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
695 696 697 698 699 700 701 702 703
    t0.n = 0;
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
704
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
705 706

    // only id and string are valid
707
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
708
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
709 710 711 712 713 714 715 716 717
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
718
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
719
      len = tGetToken(&str[*i + t0.n], &type);
720
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
721 722 723
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
724 725 726
    }
  }

X
Xiaoyu Wang 已提交
727
  t0.z = (char*)str + (*i);
S
slguan 已提交
728 729 730
  *i += t0.n;

  return t0;
H
hzcheng 已提交
731 732
}

X
Xiaoyu Wang 已提交
733
bool taosIsKeyWordToken(const char* z, int32_t len) { return (tKeywordCode((char*)z, len) != TK_NK_ID); }
H
Haojun Liao 已提交
734 735

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
736 737 738 739
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
740
}
741

H
Haojun Liao 已提交
742
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
743
  assert(pToken != NULL && buf != NULL && len > pToken->n);
X
Xiaoyu Wang 已提交
744

745 746 747
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
748
  SToken token = *pToken;
749 750 751
  token.z = buf;
  return token;
}