parTokenizer.c 18.6 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
hjxilinx 已提交
18
#include "taosdef.h"
X
Xiaoyu Wang 已提交
19
#include "thash.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

S
slguan 已提交
29
// keywords in sql string
H
hzcheng 已提交
30
static SKeyword keywordTable[] = {
X
Xiaoyu Wang 已提交
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
    {"ACCOUNT", TK_ACCOUNT},
    {"ACCOUNTS", TK_ACCOUNTS},
    {"ADD", TK_ADD},
    {"AGGREGATE", TK_AGGREGATE},
    {"ALL", TK_ALL},
    {"ALTER", TK_ALTER},
    {"ANALYZE", TK_ANALYZE},
    {"AND", TK_AND},
    {"APPS", TK_APPS},
    {"AS", TK_AS},
    {"ASC", TK_ASC},
    {"AT_ONCE", TK_AT_ONCE},
    {"BETWEEN", TK_BETWEEN},
    {"BINARY", TK_BINARY},
    {"BIGINT", TK_BIGINT},
    {"BLOCKS", TK_BLOCKS},
    {"BNODE", TK_BNODE},
    {"BNODES", TK_BNODES},
    {"BOOL", TK_BOOL},
    {"BUFSIZE", TK_BUFSIZE},
    {"BY", TK_BY},
    {"CACHE", TK_CACHE},
    {"CACHELAST", TK_CACHELAST},
    {"CAST", TK_CAST},
    {"CLUSTER", TK_CLUSTER},
    {"COLUMN", TK_COLUMN},
    {"COMMENT", TK_COMMENT},
    {"COMP", TK_COMP},
    {"COMPACT", TK_COMPACT},
    {"CONNS", TK_CONNS},
    {"CONNECTION", TK_CONNECTION},
    {"CONNECTIONS", TK_CONNECTIONS},
    {"COUNT", TK_COUNT},
    {"CREATE", TK_CREATE},
    {"DATABASE", TK_DATABASE},
    {"DATABASES", TK_DATABASES},
    {"DAYS", TK_DAYS},
    {"DBS", TK_DBS},
    {"DELAY", TK_DELAY},
    {"DESC", TK_DESC},
    {"DESCRIBE", TK_DESCRIBE},
    {"DISTINCT", TK_DISTINCT},
    {"DNODE", TK_DNODE},
    {"DNODES", TK_DNODES},
    {"DOUBLE", TK_DOUBLE},
    {"DROP", TK_DROP},
    {"EXISTS", TK_EXISTS},
    {"EXPLAIN", TK_EXPLAIN},
    {"FILE_FACTOR", TK_FILE_FACTOR},
    {"FILL", TK_FILL},
    {"FIRST", TK_FIRST},
    {"FLOAT", TK_FLOAT},
    {"FROM", TK_FROM},
    {"FSYNC", TK_FSYNC},
    {"FUNCTION", TK_FUNCTION},
    {"FUNCTIONS", TK_FUNCTIONS},
    {"GRANTS", TK_GRANTS},
    {"GROUP", TK_GROUP},
    {"HAVING", TK_HAVING},
    {"IF", TK_IF},
    {"IMPORT", TK_IMPORT},
    {"IN", TK_IN},
    {"INDEX", TK_INDEX},
    {"INDEXES", TK_INDEXES},
    {"INNER", TK_INNER},
    {"INT", TK_INT},
    {"INSERT", TK_INSERT},
    {"INTEGER", TK_INTEGER},
    {"INTERVAL", TK_INTERVAL},
    {"INTO", TK_INTO},
    {"IS", TK_IS},
    {"JOIN", TK_JOIN},
    {"JSON", TK_JSON},
    {"KEEP", TK_KEEP},
    {"KILL", TK_KILL},
    {"LAST", TK_LAST},
    {"LAST_ROW", TK_LAST_ROW},
    {"LICENCE", TK_LICENCE},
    {"LIKE", TK_LIKE},
    {"LIMIT", TK_LIMIT},
    {"LINEAR", TK_LINEAR},
    {"LOCAL", TK_LOCAL},
    {"MATCH", TK_MATCH},
    {"MAXROWS", TK_MAXROWS},
    {"MINROWS", TK_MINROWS},
    {"MINUS", TK_MINUS},
    {"MNODE", TK_MNODE},
    {"MNODES", TK_MNODES},
    {"MODIFY", TK_MODIFY},
    {"MODULES", TK_MODULES},
    {"NCHAR", TK_NCHAR},
    {"NMATCH", TK_NMATCH},
    {"NONE", TK_NONE},
    {"NOT", TK_NOT},
    {"NOW", TK_NOW},
    {"NULL", TK_NULL},
    {"NULLS", TK_NULLS},
    {"OFFSET", TK_OFFSET},
    {"ON", TK_ON},
    {"OR", TK_OR},
    {"ORDER", TK_ORDER},
    {"OUTPUTTYPE", TK_OUTPUTTYPE},
    {"PARTITION", TK_PARTITION},
    {"PASS", TK_PASS},
    {"PORT", TK_PORT},
    {"PPS", TK_PPS},
    {"PRECISION", TK_PRECISION},
    {"PRIVILEGE", TK_PRIVILEGE},
    {"PREV", TK_PREV},
    {"QNODE", TK_QNODE},
    {"QNODES", TK_QNODES},
    {"QTIME", TK_QTIME},
    {"QUERIES", TK_QUERIES},
    {"QUERY", TK_QUERY},
    {"QUORUM", TK_QUORUM},
    {"RATIO", TK_RATIO},
    {"REPLICA", TK_REPLICA},
    {"RESET", TK_RESET},
    {"RETENTIONS", TK_RETENTIONS},
    {"ROLLUP", TK_ROLLUP},
    {"SCHEMA", TK_SCHEMA},
    {"SCORES", TK_SCORES},
    {"SELECT", TK_SELECT},
    {"SESSION", TK_SESSION},
    {"SET", TK_SET},
    {"SHOW", TK_SHOW},
157
    {"SINGLE_STABLE", TK_SINGLE_STABLE},
X
Xiaoyu Wang 已提交
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
    {"SLIDING", TK_SLIDING},
    {"SLIMIT", TK_SLIMIT},
    {"SMA", TK_SMA},
    {"SMALLINT", TK_SMALLINT},
    {"SNODE", TK_SNODE},
    {"SNODES", TK_SNODES},
    {"SOFFSET", TK_SOFFSET},
    {"STABLE", TK_STABLE},
    {"STABLES", TK_STABLES},
    {"STATE", TK_STATE},
    {"STATE_WINDOW", TK_STATE_WINDOW},
    {"STORAGE", TK_STORAGE},
    {"STREAM", TK_STREAM},
    {"STREAMS", TK_STREAMS},
    {"STREAM_MODE", TK_STREAM_MODE},
    {"STRICT", TK_STRICT},
    {"SYNCDB", TK_SYNCDB},
    {"TABLE", TK_TABLE},
    {"TABLES", TK_TABLES},
    {"TAG", TK_TAG},
    {"TAGS", TK_TAGS},
    {"TBNAME", TK_TBNAME},
    {"TIMESTAMP", TK_TIMESTAMP},
    {"TIMEZONE", TK_TIMEZONE},
    {"TINYINT", TK_TINYINT},
    {"TODAY", TK_TODAY},
    {"TOPIC", TK_TOPIC},
    {"TOPICS", TK_TOPICS},
    {"TRIGGER", TK_TRIGGER},
    {"TSERIES", TK_TSERIES},
    {"TTL", TK_TTL},
    {"UNION", TK_UNION},
    {"UNSIGNED", TK_UNSIGNED},
    {"USE", TK_USE},
    {"USER", TK_USER},
    {"USERS", TK_USERS},
    {"USING", TK_USING},
    {"VALUE", TK_VALUE},
    {"VALUES", TK_VALUES},
    {"VARCHAR", TK_VARCHAR},
    {"VARIABLES", TK_VARIABLES},
    {"VERBOSE", TK_VERBOSE},
    {"VGROUPS", TK_VGROUPS},
    {"VNODES", TK_VNODES},
    {"WAL", TK_WAL},
    {"WATERMARK", TK_WATERMARK},
    {"WHERE", TK_WHERE},
    {"WINDOW_CLOSE", TK_WINDOW_CLOSE},
    {"WITH", TK_WITH},
    {"_QENDTS", TK_QENDTS},
    {"_QSTARTTS", TK_QSTARTTS},
    {"_ROWTS", TK_ROWTS},
    {"_WDURATION", TK_WDURATION},
    {"_WENDTS", TK_WENDTS},
    {"_WSTARTTS", TK_WSTARTTS},
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
    // {"ID",           TK_ID},
    // {"STRING",       TK_STRING},
    // {"EQ",           TK_EQ},
    // {"NE",           TK_NE},
    // {"ISNULL",       TK_ISNULL},
    // {"NOTNULL",      TK_NOTNULL},
    // {"GLOB",         TK_GLOB},
    // {"GT",           TK_GT},
    // {"GE",           TK_GE},
    // {"LT",           TK_LT},
    // {"LE",           TK_LE},
    // {"BITAND",       TK_BITAND},
    // {"BITOR",        TK_BITOR},
    // {"LSHIFT",       TK_LSHIFT},
    // {"RSHIFT",       TK_RSHIFT},
    // {"PLUS",         TK_PLUS},
    // {"DIVIDE",       TK_DIVIDE},
    // {"TIMES",        TK_TIMES},
    // {"STAR",         TK_STAR},
    // {"SLASH",        TK_SLASH},
    // {"REM ",         TK_REM},
    // {"||",           TK_CONCAT},
    // {"UMINUS",       TK_UMINUS},
    // {"UPLUS",        TK_UPLUS},
    // {"BITNOT",       TK_BITNOT},
    // {"DOT",          TK_DOT},
    // {"CTIME",        TK_CTIME},
    // {"LP",           TK_LP},
    // {"RP",           TK_RP},
    // {"COMMA",        TK_COMMA},
    // {"EVERY",        TK_EVERY},
    // {"VARIABLE",     TK_VARIABLE},
    // {"UPDATE",       TK_UPDATE},
    // {"CHANGE",       TK_CHANGE},
    // {"COLON",        TK_COLON},
    // {"ABORT",        TK_ABORT},
    // {"AFTER",        TK_AFTER},
    // {"ATTACH",       TK_ATTACH},
    // {"BEFORE",       TK_BEFORE},
    // {"BEGIN",        TK_BEGIN},
    // {"CASCADE",      TK_CASCADE},
    // {"CONFLICT",     TK_CONFLICT},
    // {"COPY",         TK_COPY},
    // {"DEFERRED",     TK_DEFERRED},
    // {"DELIMITERS",   TK_DELIMITERS},
    // {"DETACH",       TK_DETACH},
    // {"EACH",         TK_EACH},
    // {"END",          TK_END},
    // {"FAIL",         TK_FAIL},
    // {"FOR",          TK_FOR},
    // {"IGNORE",       TK_IGNORE},
    // {"IMMEDIATE",    TK_IMMEDIATE},
    // {"INITIALLY",    TK_INITIALLY},
    // {"INSTEAD",      TK_INSTEAD},
    // {"KEY",          TK_KEY},
    // {"OF",           TK_OF},
    // {"RAISE",        TK_RAISE},
    // {"REPLACE",      TK_REPLACE},
    // {"RESTRICT",     TK_RESTRICT},
    // {"ROW",          TK_ROW},
    // {"STATEMENT",    TK_STATEMENT},
    // {"VIEW",         TK_VIEW},
    // {"SEMI",         TK_SEMI},
276
    // {"PARTITIONS",   TK_PARTITIONS},
277
    // {"MODE",         TK_MODE},
H
hzcheng 已提交
278 279 280 281 282 283 284 285 286 287 288 289 290 291
};

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
292
static void* keywordHashTable = NULL;
H
hzcheng 已提交
293

S
TD-1057  
Shengliang Guan 已提交
294
static void doInitKeywordsTable(void) {
295
  int numOfEntries = tListLen(keywordTable);
X
Xiaoyu Wang 已提交
296

H
Haojun Liao 已提交
297
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
298
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
299
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
300
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
301
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
302
  }
303 304
}

wafwerar's avatar
wafwerar 已提交
305
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
306

307
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
308
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
X
Xiaoyu Wang 已提交
309

H
hjxilinx 已提交
310
  char key[512] = {0};
X
Xiaoyu Wang 已提交
311
  if (n > tListLen(key)) {  // too long token, can not be any other token type
312
    return TK_NK_ID;
313
  }
X
Xiaoyu Wang 已提交
314

H
hzcheng 已提交
315 316
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
317
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
318 319 320 321 322
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
323
  if (keywordHashTable == NULL) {
324
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
325
  }
H
Haojun Liao 已提交
326

H
Haojun Liao 已提交
327
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
X
Xiaoyu Wang 已提交
328
  return (pKey != NULL) ? (*pKey)->type : TK_NK_ID;
H
hzcheng 已提交
329 330
}

H
huili 已提交
331
/*
332 333 334
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
335
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
336
  uint32_t i;
H
hzcheng 已提交
337 338 339 340 341 342 343 344
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
345
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
346 347 348
      return i;
    }
    case ':': {
349
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
350 351 352 353 354 355
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
356
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
357
        return i;
358 359 360
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
361
      }
X
Xiaoyu Wang 已提交
362
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
363 364 365
      return 1;
    }
    case '(': {
366
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
367 368 369
      return 1;
    }
    case ')': {
370
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
371 372 373
      return 1;
    }
    case ';': {
374
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
375 376 377
      return 1;
    }
    case '+': {
378
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
379 380 381
      return 1;
    }
    case '*': {
382
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
383 384 385 386
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
387
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
388 389 390 391 392
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
393
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
394 395 396
      return i;
    }
    case '%': {
397
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
398 399 400
      return 1;
    }
    case '=': {
401
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
402 403 404 405
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
406
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
407 408
        return 2;
      } else if (z[1] == '>') {
409
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
410 411
        return 2;
      } else if (z[1] == '<') {
412
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
413 414
        return 2;
      } else {
415
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
416 417 418 419 420
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
421
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
422 423
        return 2;
      } else if (z[1] == '>') {
424
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
425 426
        return 2;
      } else {
427
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
428 429 430 431 432
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
433
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
434 435
        return 2;
      } else {
436
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
437 438 439 440 441
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
442
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
443 444
        return 1;
      } else {
445
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
446 447 448 449
        return 2;
      }
    }
    case ',': {
450
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
451 452 453
      return 1;
    }
    case '&': {
454
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
455 456 457
      return 1;
    }
    case '~': {
458
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
459 460
      return 1;
    }
S
slguan 已提交
461
    case '?': {
462
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
463 464
      return 1;
    }
465
    case '`':
H
hzcheng 已提交
466 467
    case '\'':
    case '"': {
S
slguan 已提交
468 469
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
470
      for (i = 1; z[i]; i++) {
X
Xiaoyu Wang 已提交
471
        if (z[i] == '\\') {  // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
472 473 474
          i++;
          continue;
        }
X
Xiaoyu Wang 已提交
475

476
        if (z[i] == delim) {
H
hzcheng 已提交
477 478 479
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
480
            strEnd = true;
H
hzcheng 已提交
481 482 483 484
            break;
          }
        }
      }
X
Xiaoyu Wang 已提交
485

H
hzcheng 已提交
486
      if (z[i]) i++;
H
huili 已提交
487

S
slguan 已提交
488
      if (strEnd) {
X
Xiaoyu Wang 已提交
489
        *tokenId = (delim == '`') ? TK_NK_ID : TK_NK_STRING;
S
slguan 已提交
490 491
        return i;
      }
H
huili 已提交
492

S
slguan 已提交
493
      break;
H
hzcheng 已提交
494 495
    }
    case '.': {
S
slguan 已提交
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

513
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
514 515
        return i;
      } else {
516
        *tokenId = TK_NK_DOT;
S
slguan 已提交
517 518 519 520 521 522 523
        return 1;
      }
    }

    case '0': {
      char next = z[1];

X
Xiaoyu Wang 已提交
524
      if (next == 'b') {  // bin number
525
        *tokenId = TK_NK_BIN;
S
slguan 已提交
526 527 528 529 530 531 532 533
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
X
Xiaoyu Wang 已提交
534
      } else if (next == 'x') {  // hex number
535
        *tokenId = TK_NK_HEX;
S
slguan 已提交
536 537 538 539 540 541 542 543 544
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
545 546 547 548 549 550 551 552 553 554
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
555
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
556 557 558
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
559
      /* here is the 1u/1a/2s/3m/9y */
X
Xiaoyu Wang 已提交
560 561 562
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' ||
           z[i] == 'n' || z[i] == 'y' || z[i] == 'w' || z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' ||
           z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' || z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
563
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
564
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
565 566 567 568 569 570 571 572 573 574
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
575
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
576 577 578 579
        seg++;
      }

      if (seg == 4) {  // ip address
580
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
581 582 583 584 585 586 587 588 589
        return i;
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
590
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
591 592 593 594 595 596
      }
      return i;
    }
    case '[': {
      for (i = 1; z[i] && z[i - 1] != ']'; i++) {
      }
597
      *tokenId = TK_NK_ID;
H
hzcheng 已提交
598 599 600 601 602 603
      return i;
    }
    case 'T':
    case 't':
    case 'F':
    case 'f': {
X
Xiaoyu Wang 已提交
604
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
605 606 607
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
608
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
609 610 611 612
        return i;
      }
    }
    default: {
X
Xiaoyu Wang 已提交
613
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
H
hzcheng 已提交
614 615
        break;
      }
X
Xiaoyu Wang 已提交
616
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
617
      }
618
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
619 620 621 622
      return i;
    }
  }

623
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
624 625 626
  return 0;
}

X
Xiaoyu Wang 已提交
627 628 629
SToken tscReplaceStrToken(char** str, SToken* token, const char* newToken) {
  char*   src = *str;
  size_t  nsize = strlen(newToken);
D
dapan1121 已提交
630 631
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
X
Xiaoyu Wang 已提交
632
  SToken  ntoken;
D
dapan1121 已提交
633

wafwerar's avatar
wafwerar 已提交
634
  *str = taosMemoryCalloc(1, size);
D
dapan1121 已提交
635 636

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
637
  strcat(*str, newToken);
D
dapan1121 已提交
638 639
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
640
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
641 642
  ntoken.z = *str + bsize;

wafwerar's avatar
wafwerar 已提交
643
  taosMemoryFreeClear(src);
D
dapan1121 已提交
644 645 646 647

  return ntoken;
}

648
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
649
  SToken t0 = {0};
S
slguan 已提交
650

H
hzcheng 已提交
651 652
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
653 654
    t0.n = 0;
    return t0;
H
hzcheng 已提交
655 656
  }

657
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
658 659 660
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
661
    int32_t numOfComma = 0;
X
Xiaoyu Wang 已提交
662
    char    t = str[*i];
H
Haojun Liao 已提交
663 664 665 666
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
667
      }
X
Xiaoyu Wang 已提交
668

H
Haojun Liao 已提交
669
      t = str[++(*i)];
S
slguan 已提交
670
    }
H
hzcheng 已提交
671

672
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
673
    break;
S
slguan 已提交
674

H
Haojun Liao 已提交
675 676
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
677
    bool ignore = false;
S
slguan 已提交
678 679
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
680
        ignore = true;
S
slguan 已提交
681 682 683 684
        break;
      }
    }

H
Haojun Liao 已提交
685
    if (!ignore) {
S
slguan 已提交
686 687
      break;
    }
H
Haojun Liao 已提交
688
#endif
H
hzcheng 已提交
689 690
  }

691
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
692 693 694 695 696 697 698 699 700
    t0.n = 0;
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
701
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
702 703

    // only id and string are valid
704
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
705
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
706 707 708 709 710 711 712 713 714
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
715
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
716
      len = tGetToken(&str[*i + t0.n], &type);
717
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
718 719 720
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
721 722 723
    }
  }

X
Xiaoyu Wang 已提交
724
  t0.z = (char*)str + (*i);
S
slguan 已提交
725 726 727
  *i += t0.n;

  return t0;
H
hzcheng 已提交
728 729
}

X
Xiaoyu Wang 已提交
730
bool taosIsKeyWordToken(const char* z, int32_t len) { return (tKeywordCode((char*)z, len) != TK_NK_ID); }
H
Haojun Liao 已提交
731 732

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
733 734 735 736
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
737
}
738

H
Haojun Liao 已提交
739
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
740
  assert(pToken != NULL && buf != NULL && len > pToken->n);
X
Xiaoyu Wang 已提交
741

742 743 744
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
745
  SToken token = *pToken;
746 747 748
  token.z = buf;
  return token;
}