ttokenizer.c 17.0 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16 17
#include "os.h"

18
#include "hash.h"
H
hjxilinx 已提交
19 20
#include "hashfunc.h"
#include "taosdef.h"
21
#include "ttoken.h"
22 23
#include "ttokendef.h"
#include "tutil.h"
H
hzcheng 已提交
24

S
slguan 已提交
25
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
26
typedef struct SKeyword {
S
slguan 已提交
27
  const char* name;  // The keyword name
H
Haojun Liao 已提交
28
  uint16_t    type;  // type
S
slguan 已提交
29
  uint8_t     len;   // length
H
hzcheng 已提交
30 31
} SKeyword;

S
slguan 已提交
32
// keywords in sql string
H
hzcheng 已提交
33
static SKeyword keywordTable[] = {
S
slguan 已提交
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
    {"ID",           TK_ID},
    {"BOOL",         TK_BOOL},
    {"TINYINT",      TK_TINYINT},
    {"SMALLINT",     TK_SMALLINT},
    {"INTEGER",      TK_INTEGER},
    {"INT",          TK_INTEGER},
    {"BIGINT",       TK_BIGINT},
    {"FLOAT",        TK_FLOAT},
    {"DOUBLE",       TK_DOUBLE},
    {"STRING",       TK_STRING},
    {"TIMESTAMP",    TK_TIMESTAMP},
    {"BINARY",       TK_BINARY},
    {"NCHAR",        TK_NCHAR},
    {"OR",           TK_OR},
    {"AND",          TK_AND},
    {"NOT",          TK_NOT},
    {"EQ",           TK_EQ},
    {"NE",           TK_NE},
    {"ISNULL",       TK_ISNULL},
    {"NOTNULL",      TK_NOTNULL},
    {"IS",           TK_IS},
    {"LIKE",         TK_LIKE},
    {"GLOB",         TK_GLOB},
    {"BETWEEN",      TK_BETWEEN},
    {"IN",           TK_IN},
    {"GT",           TK_GT},
    {"GE",           TK_GE},
    {"LT",           TK_LT},
    {"LE",           TK_LE},
    {"BITAND",       TK_BITAND},
    {"BITOR",        TK_BITOR},
    {"LSHIFT",       TK_LSHIFT},
    {"RSHIFT",       TK_RSHIFT},
    {"PLUS",         TK_PLUS},
    {"MINUS",        TK_MINUS},
    {"DIVIDE",       TK_DIVIDE},
    {"TIMES",        TK_TIMES},
    {"STAR",         TK_STAR},
    {"SLASH",        TK_SLASH},
    {"REM ",         TK_REM},
    {"CONCAT",       TK_CONCAT},
    {"UMINUS",       TK_UMINUS},
    {"UPLUS",        TK_UPLUS},
    {"BITNOT",       TK_BITNOT},
    {"SHOW",         TK_SHOW},
    {"DATABASES",    TK_DATABASES},
    {"MNODES",       TK_MNODES},
    {"DNODES",       TK_DNODES},
    {"ACCOUNTS",     TK_ACCOUNTS},
    {"USERS",        TK_USERS},
    {"MODULES",      TK_MODULES},
    {"QUERIES",      TK_QUERIES},
    {"CONNECTIONS",  TK_CONNECTIONS},
    {"STREAMS",      TK_STREAMS},
H
Haojun Liao 已提交
88
    {"VARIABLES",    TK_VARIABLES},
S
slguan 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102
    {"SCORES",       TK_SCORES},
    {"GRANTS",       TK_GRANTS},
    {"DOT",          TK_DOT},
    {"TABLES",       TK_TABLES},
    {"STABLES",      TK_STABLES},
    {"VGROUPS",      TK_VGROUPS},
    {"DROP",         TK_DROP},
    {"TABLE",        TK_TABLE},
    {"DATABASE",     TK_DATABASE},
    {"DNODE",        TK_DNODE},
    {"USER",         TK_USER},
    {"ACCOUNT",      TK_ACCOUNT},
    {"USE",          TK_USE},
    {"DESCRIBE",     TK_DESCRIBE},
103
    {"SYNCDB",       TK_SYNCDB},
S
slguan 已提交
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
    {"ALTER",        TK_ALTER},
    {"PASS",         TK_PASS},
    {"PRIVILEGE",    TK_PRIVILEGE},
    {"LOCAL",        TK_LOCAL},
    {"IF",           TK_IF},
    {"EXISTS",       TK_EXISTS},
    {"CREATE",       TK_CREATE},
    {"PPS",          TK_PPS},
    {"TSERIES",      TK_TSERIES},
    {"DBS",          TK_DBS},
    {"STORAGE",      TK_STORAGE},
    {"QTIME",        TK_QTIME},
    {"CONNS",        TK_CONNS},
    {"STATE",        TK_STATE},
    {"KEEP",         TK_KEEP},
    {"REPLICA",      TK_REPLICA},
120
    {"QUORUM",       TK_QUORUM},
S
slguan 已提交
121
    {"DAYS",         TK_DAYS},
H
hjxilinx 已提交
122 123 124
    {"MINROWS",      TK_MINROWS},
    {"MAXROWS",      TK_MAXROWS},
    {"BLOCKS",       TK_BLOCKS},
S
slguan 已提交
125 126
    {"CACHE",        TK_CACHE},
    {"CTIME",        TK_CTIME},
H
hjxilinx 已提交
127
    {"WAL",          TK_WAL},
陶建辉(Jeff)'s avatar
sql.y  
陶建辉(Jeff) 已提交
128
    {"FSYNC",        TK_FSYNC},
S
slguan 已提交
129 130 131 132
    {"COMP",         TK_COMP},
    {"PRECISION",    TK_PRECISION},
    {"LP",           TK_LP},
    {"RP",           TK_RP},
133
    {"UNSIGNED",     TK_UNSIGNED},
S
slguan 已提交
134 135 136 137 138 139 140 141 142
    {"TAGS",         TK_TAGS},
    {"USING",        TK_USING},
    {"AS",           TK_AS},
    {"COMMA",        TK_COMMA},
    {"NULL",         TK_NULL},
    {"SELECT",       TK_SELECT},
    {"FROM",         TK_FROM},
    {"VARIABLE",     TK_VARIABLE},
    {"INTERVAL",     TK_INTERVAL},
143
    {"SESSION",      TK_SESSION},
144
    {"STATE_WINDOW", TK_STATE_WINDOW},
S
slguan 已提交
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
    {"FILL",         TK_FILL},
    {"SLIDING",      TK_SLIDING},
    {"ORDER",        TK_ORDER},
    {"BY",           TK_BY},
    {"ASC",          TK_ASC},
    {"DESC",         TK_DESC},
    {"GROUP",        TK_GROUP},
    {"HAVING",       TK_HAVING},
    {"LIMIT",        TK_LIMIT},
    {"OFFSET",       TK_OFFSET},
    {"SLIMIT",       TK_SLIMIT},
    {"SOFFSET",      TK_SOFFSET},
    {"WHERE",        TK_WHERE},
    {"NOW",          TK_NOW},
    {"INSERT",       TK_INSERT},
    {"INTO",         TK_INTO},
    {"VALUES",       TK_VALUES},
Y
yihaoDeng 已提交
162
    {"UPDATE",       TK_UPDATE},
S
slguan 已提交
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
    {"RESET",        TK_RESET},
    {"QUERY",        TK_QUERY},
    {"ADD",          TK_ADD},
    {"COLUMN",       TK_COLUMN},
    {"TAG",          TK_TAG},
    {"CHANGE",       TK_CHANGE},
    {"SET",          TK_SET},
    {"KILL",         TK_KILL},
    {"CONNECTION",   TK_CONNECTION},
    {"COLON",        TK_COLON},
    {"STREAM",       TK_STREAM},
    {"ABORT",        TK_ABORT},
    {"AFTER",        TK_AFTER},
    {"ATTACH",       TK_ATTACH},
    {"BEFORE",       TK_BEFORE},
    {"BEGIN",        TK_BEGIN},
    {"CASCADE",      TK_CASCADE},
    {"CLUSTER",      TK_CLUSTER},
    {"CONFLICT",     TK_CONFLICT},
    {"COPY",         TK_COPY},
    {"DEFERRED",     TK_DEFERRED},
    {"DELIMITERS",   TK_DELIMITERS},
    {"DETACH",       TK_DETACH},
    {"EACH",         TK_EACH},
    {"END",          TK_END},
    {"EXPLAIN",      TK_EXPLAIN},
    {"FAIL",         TK_FAIL},
    {"FOR",          TK_FOR},
    {"IGNORE",       TK_IGNORE},
    {"IMMEDIATE",    TK_IMMEDIATE},
    {"INITIALLY",    TK_INITIALLY},
    {"INSTEAD",      TK_INSTEAD},
    {"MATCH",        TK_MATCH},
    {"KEY",          TK_KEY},
    {"OF",           TK_OF},
    {"RAISE",        TK_RAISE},
    {"REPLACE",      TK_REPLACE},
    {"RESTRICT",     TK_RESTRICT},
    {"ROW",          TK_ROW},
    {"STATEMENT",    TK_STATEMENT},
    {"TRIGGER",      TK_TRIGGER},
    {"VIEW",         TK_VIEW},
    {"ALL",          TK_ALL},
    {"SEMI",         TK_SEMI},
    {"NONE",         TK_NONE},
    {"PREV",         TK_PREV},
    {"LINEAR",       TK_LINEAR},
    {"IMPORT",       TK_IMPORT},
    {"TBNAME",       TK_TBNAME},
    {"JOIN",         TK_JOIN},
    {"STABLE",       TK_STABLE},
    {"FILE",         TK_FILE},
L
lihui 已提交
215
    {"VNODES",       TK_VNODES},
216
    {"UNION",        TK_UNION},
D
dapan1121 已提交
217
    {"CACHELAST",    TK_CACHELAST},
dengyihao's avatar
TD-2571  
dengyihao 已提交
218
    {"DISTINCT",     TK_DISTINCT},
D
dapan1121 已提交
219 220 221
    {"PARTITIONS",   TK_PARTITIONS},
    {"TOPIC",        TK_TOPIC},
    {"TOPICS",       TK_TOPICS}
H
hzcheng 已提交
222 223 224 225 226 227 228 229 230 231 232 233 234 235
};

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
236
static void* keywordHashTable = NULL;
H
hzcheng 已提交
237

S
TD-1057  
Shengliang Guan 已提交
238
static void doInitKeywordsTable(void) {
239 240
  int numOfEntries = tListLen(keywordTable);
  
H
Haojun Liao 已提交
241
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
242
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
243
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
244
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
245
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
246
  }
247 248 249 250
}

static pthread_once_t keywordsHashTableInit = PTHREAD_ONCE_INIT;

251
static int32_t tKeywordCode(const char* z, int n) {
252
  pthread_once(&keywordsHashTableInit, doInitKeywordsTable);
H
hjxilinx 已提交
253 254
  
  char key[512] = {0};
255 256 257
  if (n > tListLen(key)) { // too long token, can not be any other token type
    return TK_ID;
  }
H
hjxilinx 已提交
258
  
H
hzcheng 已提交
259 260
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
261
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
262 263 264 265 266
    } else {
      key[j] = z[j];
    }
  }

H
Haojun Liao 已提交
267
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
H
Haojun Liao 已提交
268
  return (pKey != NULL)? (*pKey)->type:TK_ID;
H
hzcheng 已提交
269 270
}

H
huili 已提交
271
/*
272 273 274
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
275
uint32_t tGetToken(char* z, uint32_t* tokenId) {
276
  uint32_t i;
H
hzcheng 已提交
277 278 279 280 281 282 283 284
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
H
Haojun Liao 已提交
285
      *tokenId = TK_SPACE;
H
hzcheng 已提交
286 287 288
      return i;
    }
    case ':': {
H
Haojun Liao 已提交
289
      *tokenId = TK_COLON;
H
hzcheng 已提交
290 291 292 293 294 295
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
H
Haojun Liao 已提交
296
        *tokenId = TK_COMMENT;
H
hzcheng 已提交
297 298
        return i;
      }
H
Haojun Liao 已提交
299
      *tokenId = TK_MINUS;
H
hzcheng 已提交
300 301 302
      return 1;
    }
    case '(': {
H
Haojun Liao 已提交
303
      *tokenId = TK_LP;
H
hzcheng 已提交
304 305 306
      return 1;
    }
    case ')': {
H
Haojun Liao 已提交
307
      *tokenId = TK_RP;
H
hzcheng 已提交
308 309 310
      return 1;
    }
    case ';': {
H
Haojun Liao 已提交
311
      *tokenId = TK_SEMI;
H
hzcheng 已提交
312 313 314
      return 1;
    }
    case '+': {
H
Haojun Liao 已提交
315
      *tokenId = TK_PLUS;
H
hzcheng 已提交
316 317 318
      return 1;
    }
    case '*': {
H
Haojun Liao 已提交
319
      *tokenId = TK_STAR;
H
hzcheng 已提交
320 321 322 323
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
H
Haojun Liao 已提交
324
        *tokenId = TK_SLASH;
H
hzcheng 已提交
325 326 327 328 329
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
H
Haojun Liao 已提交
330
      *tokenId = TK_COMMENT;
H
hzcheng 已提交
331 332 333
      return i;
    }
    case '%': {
H
Haojun Liao 已提交
334
      *tokenId = TK_REM;
H
hzcheng 已提交
335 336 337
      return 1;
    }
    case '=': {
H
Haojun Liao 已提交
338
      *tokenId = TK_EQ;
H
hzcheng 已提交
339 340 341 342
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
H
Haojun Liao 已提交
343
        *tokenId = TK_LE;
H
hzcheng 已提交
344 345
        return 2;
      } else if (z[1] == '>') {
H
Haojun Liao 已提交
346
        *tokenId = TK_NE;
H
hzcheng 已提交
347 348
        return 2;
      } else if (z[1] == '<') {
H
Haojun Liao 已提交
349
        *tokenId = TK_LSHIFT;
H
hzcheng 已提交
350 351
        return 2;
      } else {
H
Haojun Liao 已提交
352
        *tokenId = TK_LT;
H
hzcheng 已提交
353 354 355 356 357
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
H
Haojun Liao 已提交
358
        *tokenId = TK_GE;
H
hzcheng 已提交
359 360
        return 2;
      } else if (z[1] == '>') {
H
Haojun Liao 已提交
361
        *tokenId = TK_RSHIFT;
H
hzcheng 已提交
362 363
        return 2;
      } else {
H
Haojun Liao 已提交
364
        *tokenId = TK_GT;
H
hzcheng 已提交
365 366 367 368 369
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
H
Haojun Liao 已提交
370
        *tokenId = TK_ILLEGAL;
H
hzcheng 已提交
371 372
        return 2;
      } else {
H
Haojun Liao 已提交
373
        *tokenId = TK_NE;
H
hzcheng 已提交
374 375 376 377 378
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
H
Haojun Liao 已提交
379
        *tokenId = TK_BITOR;
H
hzcheng 已提交
380 381
        return 1;
      } else {
H
Haojun Liao 已提交
382
        *tokenId = TK_CONCAT;
H
hzcheng 已提交
383 384 385 386
        return 2;
      }
    }
    case ',': {
H
Haojun Liao 已提交
387
      *tokenId = TK_COMMA;
H
hzcheng 已提交
388 389 390
      return 1;
    }
    case '&': {
H
Haojun Liao 已提交
391
      *tokenId = TK_BITAND;
H
hzcheng 已提交
392 393 394
      return 1;
    }
    case '~': {
H
Haojun Liao 已提交
395
      *tokenId = TK_BITNOT;
H
hzcheng 已提交
396 397
      return 1;
    }
S
slguan 已提交
398
    case '?': {
H
Haojun Liao 已提交
399
      *tokenId = TK_QUESTION;
S
slguan 已提交
400 401
      return 1;
    }
H
hzcheng 已提交
402 403
    case '\'':
    case '"': {
S
slguan 已提交
404 405
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
406
      for (i = 1; z[i]; i++) {
H
Haojun Liao 已提交
407
        if (z[i] == '\\') {   // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
408 409 410 411
          i++;
          continue;
        }
        
412
        if (z[i] == delim) {
H
hzcheng 已提交
413 414 415
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
416
            strEnd = true;
H
hzcheng 已提交
417 418 419 420
            break;
          }
        }
      }
L
[1292]  
lihui 已提交
421
      
H
hzcheng 已提交
422
      if (z[i]) i++;
H
huili 已提交
423

S
slguan 已提交
424
      if (strEnd) {
H
Haojun Liao 已提交
425
        *tokenId = TK_STRING;
S
slguan 已提交
426 427
        return i;
      }
H
huili 已提交
428

S
slguan 已提交
429
      break;
H
hzcheng 已提交
430 431
    }
    case '.': {
S
slguan 已提交
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

H
Haojun Liao 已提交
449
        *tokenId = TK_FLOAT;
S
slguan 已提交
450 451
        return i;
      } else {
H
Haojun Liao 已提交
452
        *tokenId = TK_DOT;
S
slguan 已提交
453 454 455 456 457 458 459 460
        return 1;
      }
    }

    case '0': {
      char next = z[1];

      if (next == 'b') { // bin number
H
Haojun Liao 已提交
461
        *tokenId = TK_BIN;
S
slguan 已提交
462 463 464 465 466 467 468 469 470
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      } else if (next == 'x') {  //hex number
H
Haojun Liao 已提交
471
        *tokenId = TK_HEX;
S
slguan 已提交
472 473 474 475 476 477 478 479 480
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
481 482 483 484 485 486 487 488 489 490
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
H
Haojun Liao 已提交
491
      *tokenId = TK_INTEGER;
H
hzcheng 已提交
492 493 494
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
495 496 497 498 499
      /* here is the 1u/1a/2s/3m/9y */
      if ((z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' || z[i] == 'n' ||
           z[i] == 'y' || z[i] == 'w' ||
           z[i] == 'U' || z[i] == 'A' || z[i] == 'S' || z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' ||
           z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
500
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
H
Haojun Liao 已提交
501
        *tokenId = TK_VARIABLE;
H
hzcheng 已提交
502 503 504 505 506 507 508 509 510 511
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
H
Haojun Liao 已提交
512
        *tokenId = TK_FLOAT;
H
hzcheng 已提交
513 514 515 516
        seg++;
      }

      if (seg == 4) {  // ip address
H
Haojun Liao 已提交
517
        *tokenId = TK_IPTOKEN;
H
hzcheng 已提交
518 519 520 521 522 523 524 525 526
        return i;
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
H
Haojun Liao 已提交
527
        *tokenId = TK_FLOAT;
H
hzcheng 已提交
528 529 530 531 532 533
      }
      return i;
    }
    case '[': {
      for (i = 1; z[i] && z[i - 1] != ']'; i++) {
      }
H
Haojun Liao 已提交
534
      *tokenId = TK_ID;
H
hzcheng 已提交
535 536 537 538 539 540
      return i;
    }
    case 'T':
    case 't':
    case 'F':
    case 'f': {
H
hjxilinx 已提交
541
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t) z[i]]; i++) {
H
hzcheng 已提交
542 543 544
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
H
Haojun Liao 已提交
545
        *tokenId = TK_BOOL;
H
hzcheng 已提交
546 547 548 549
        return i;
      }
    }
    default: {
H
hjxilinx 已提交
550
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t) *z]) {
H
hzcheng 已提交
551 552
        break;
      }
H
hjxilinx 已提交
553
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t) z[i]]; i++) {
H
hzcheng 已提交
554
      }
555
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
556 557 558 559
      return i;
    }
  }

H
Haojun Liao 已提交
560
  *tokenId = TK_ILLEGAL;
H
hzcheng 已提交
561 562 563
  return 0;
}

H
Haojun Liao 已提交
564
SStrToken tscReplaceStrToken(char **str, SStrToken *token, const char* newToken) {
D
dapan1121 已提交
565
  char *src = *str;
D
dapan1121 已提交
566 567 568
  size_t nsize = strlen(newToken);
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
D
dapan1121 已提交
569 570 571 572 573
  SStrToken ntoken;

  *str = calloc(1, size);

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
574
  strcat(*str, newToken);
D
dapan1121 已提交
575 576
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
577
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
578 579 580 581 582 583 584
  ntoken.z = *str + bsize;

  tfree(src);

  return ntoken;
}

H
Haojun Liao 已提交
585
SStrToken tStrGetToken(char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
586
  SStrToken t0 = {0};
S
slguan 已提交
587

H
hzcheng 已提交
588 589
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
590 591
    t0.n = 0;
    return t0;
H
hzcheng 已提交
592 593
  }

S
slguan 已提交
594 595 596 597
  // IGNORE TK_SPACE, TK_COMMA, and specified tokens
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
598 599 600 601 602 603
    int32_t numOfComma = 0;
    char t = str[*i];
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
604
      }
H
Haojun Liao 已提交
605
    
H
Haojun Liao 已提交
606
      t = str[++(*i)];
S
slguan 已提交
607
    }
H
hzcheng 已提交
608

609
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
610
    break;
S
slguan 已提交
611

H
Haojun Liao 已提交
612 613
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
614
    bool ignore = false;
S
slguan 已提交
615 616
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
617
        ignore = true;
S
slguan 已提交
618 619 620 621
        break;
      }
    }

H
Haojun Liao 已提交
622
    if (!ignore) {
S
slguan 已提交
623 624
      break;
    }
H
Haojun Liao 已提交
625
#endif
H
hzcheng 已提交
626 627
  }

S
slguan 已提交
628 629 630 631 632 633 634 635 636 637
  if (t0.type == TK_SEMI) {
    t0.n = 0;
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
638
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
639 640 641 642 643 644 645 646 647 648 649 650 651 652

    // only id and string are valid
    if ((TK_STRING != t0.type) && (TK_ID != t0.type)) {
      t0.type = TK_ILLEGAL;
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
    if ((isPrevOptr) && (t0.type == TK_MINUS || t0.type == TK_PLUS)) {
653
      len = tGetToken(&str[*i + t0.n], &type);
S
slguan 已提交
654 655 656 657
      if (type == TK_INTEGER || type == TK_FLOAT) {
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
658 659 660
    }
  }

S
slguan 已提交
661 662 663 664
  t0.z = str + (*i);
  *i += t0.n;

  return t0;
H
hzcheng 已提交
665 666
}

667 668 669
bool taosIsKeyWordToken(const char* z, int32_t len) {
  return (tKeywordCode((char*)z, len) != TK_ID);
}
H
Haojun Liao 已提交
670 671

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
672 673 674 675
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
676
}