parTokenizer.c 21.3 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
hjxilinx 已提交
18
#include "taosdef.h"
X
Xiaoyu Wang 已提交
19
#include "thash.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

X
Xiaoyu Wang 已提交
29
// clang-format off
S
slguan 已提交
30
// keywords in sql string
H
hzcheng 已提交
31
static SKeyword keywordTable[] = {
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
    {"ACCOUNT",        TK_ACCOUNT},
    {"ACCOUNTS",       TK_ACCOUNTS},
    {"ADD",            TK_ADD},
    {"AGGREGATE",      TK_AGGREGATE},
    {"ALL",            TK_ALL},
    {"ALTER",          TK_ALTER},
    {"ANALYZE",        TK_ANALYZE},
    {"AND",            TK_AND},
    {"APPS",           TK_APPS},
    {"AS",             TK_AS},
    {"ASC",            TK_ASC},
    {"AT_ONCE",        TK_AT_ONCE},
    {"BALANCE",        TK_BALANCE},
    {"BETWEEN",        TK_BETWEEN},
    {"BINARY",         TK_BINARY},
    {"BIGINT",         TK_BIGINT},
    {"BNODE",          TK_BNODE},
    {"BNODES",         TK_BNODES},
    {"BOOL",           TK_BOOL},
    {"BUFFER",         TK_BUFFER},
    {"BUFSIZE",        TK_BUFSIZE},
    {"BY",             TK_BY},
    {"CACHE",          TK_CACHE},
    {"CACHELAST",      TK_CACHELAST},
    {"CAST",           TK_CAST},
    {"CLIENT_VERSION", TK_CLIENT_VERSION},
    {"CLUSTER",        TK_CLUSTER},
    {"COLUMN",         TK_COLUMN},
    {"COMMENT",        TK_COMMENT},
    {"COMP",           TK_COMP},
    {"COMPACT",        TK_COMPACT},
    {"CONNS",          TK_CONNS},
    {"CONNECTION",     TK_CONNECTION},
    {"CONNECTIONS",    TK_CONNECTIONS},
    {"CONSUMER",       TK_CONSUMER},
    {"CONSUMERS",      TK_CONSUMERS},
    {"CONTAINS",       TK_CONTAINS},
    {"COUNT",          TK_COUNT},
    {"CREATE",         TK_CREATE},
    {"CURRENT_USER",   TK_CURRENT_USER},
    {"DATABASE",       TK_DATABASE},
    {"DATABASES",      TK_DATABASES},
    {"DBS",            TK_DBS},
    {"DELETE",         TK_DELETE},
    {"DESC",           TK_DESC},
    {"DESCRIBE",       TK_DESCRIBE},
    {"DISTINCT",       TK_DISTINCT},
    {"DISTRIBUTED",    TK_DISTRIBUTED},
    {"DNODE",          TK_DNODE},
    {"DNODES",         TK_DNODES},
    {"DOUBLE",         TK_DOUBLE},
    {"DROP",           TK_DROP},
    {"DURATION",       TK_DURATION},
    {"ENABLE",         TK_ENABLE},
    {"EXISTS",         TK_EXISTS},
87
    {"EXPIRED",        TK_EXPIRED},
88 89
    {"EXPLAIN",        TK_EXPLAIN},
    {"EVERY",          TK_EVERY},
X
Xiaoyu Wang 已提交
90
    {"FILE",           TK_FILE},
91 92 93 94 95 96 97 98 99 100 101 102
    {"FILL",           TK_FILL},
    {"FIRST",          TK_FIRST},
    {"FLOAT",          TK_FLOAT},
    {"FROM",           TK_FROM},
    {"FSYNC",          TK_FSYNC},
    {"FUNCTION",       TK_FUNCTION},
    {"FUNCTIONS",      TK_FUNCTIONS},
    {"GRANT",          TK_GRANT},
    {"GRANTS",         TK_GRANTS},
    {"GROUP",          TK_GROUP},
    {"HAVING",         TK_HAVING},
    {"IF",             TK_IF},
103
    {"IGNORE",         TK_IGNORE},
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
    {"IMPORT",         TK_IMPORT},
    {"IN",             TK_IN},
    {"INDEX",          TK_INDEX},
    {"INDEXES",        TK_INDEXES},
    {"INNER",          TK_INNER},
    {"INT",            TK_INT},
    {"INSERT",         TK_INSERT},
    {"INTEGER",        TK_INTEGER},
    {"INTERVAL",       TK_INTERVAL},
    {"INTO",           TK_INTO},
    {"IS",             TK_IS},
    {"JOIN",           TK_JOIN},
    {"JSON",           TK_JSON},
    {"KEEP",           TK_KEEP},
    {"KILL",           TK_KILL},
    {"LAST",           TK_LAST},
    {"LAST_ROW",       TK_LAST_ROW},
    {"LICENCE",        TK_LICENCE},
    {"LIKE",           TK_LIKE},
    {"LIMIT",          TK_LIMIT},
    {"LINEAR",         TK_LINEAR},
    {"LOCAL",          TK_LOCAL},
    {"MATCH",          TK_MATCH},
    {"MAXROWS",        TK_MAXROWS},
    {"MAX_DELAY",      TK_MAX_DELAY},
    {"MERGE",          TK_MERGE},
    {"META",           TK_META},
    {"MINROWS",        TK_MINROWS},
    {"MINUS",          TK_MINUS},
    {"MNODE",          TK_MNODE},
    {"MNODES",         TK_MNODES},
    {"MODIFY",         TK_MODIFY},
    {"MODULES",        TK_MODULES},
    {"NCHAR",          TK_NCHAR},
    {"NEXT",           TK_NEXT},
    {"NMATCH",         TK_NMATCH},
    {"NONE",           TK_NONE},
    {"NOT",            TK_NOT},
    {"NOW",            TK_NOW},
    {"NULL",           TK_NULL},
    {"NULLS",          TK_NULLS},
    {"OFFSET",         TK_OFFSET},
    {"ON",             TK_ON},
    {"OR",             TK_OR},
    {"ORDER",          TK_ORDER},
    {"OUTPUTTYPE",     TK_OUTPUTTYPE},
    {"PARTITION",      TK_PARTITION},
    {"PASS",           TK_PASS},
    {"PAGES",          TK_PAGES},
    {"PAGESIZE",       TK_PAGESIZE},
    {"PORT",           TK_PORT},
    {"PPS",            TK_PPS},
    {"PRECISION",      TK_PRECISION},
    // {"PRIVILEGE",      TK_PRIVILEGE},
    {"PREV",           TK_PREV},
    {"QNODE",          TK_QNODE},
    {"QNODES",         TK_QNODES},
    {"QTIME",          TK_QTIME},
    {"QUERIES",        TK_QUERIES},
    {"QUERY",          TK_QUERY},
    {"RANGE",          TK_RANGE},
    {"RATIO",          TK_RATIO},
    {"READ",           TK_READ},
    {"REDISTRIBUTE",   TK_REDISTRIBUTE},
    {"RENAME",         TK_RENAME},
    {"REPLICA",        TK_REPLICA},
    {"RESET",          TK_RESET},
    {"RETENTIONS",     TK_RETENTIONS},
    {"REVOKE",         TK_REVOKE},
    {"ROLLUP",         TK_ROLLUP},
    {"SCHEMALESS",     TK_SCHEMALESS},
    {"SCORES",         TK_SCORES},
    {"SELECT",         TK_SELECT},
    {"SERVER_STATUS",  TK_SERVER_STATUS},
    {"SERVER_VERSION", TK_SERVER_VERSION},
    {"SESSION",        TK_SESSION},
    {"SET",            TK_SET},
    {"SHOW",           TK_SHOW},
    {"SINGLE_STABLE",  TK_SINGLE_STABLE},
    {"SLIDING",        TK_SLIDING},
    {"SLIMIT",         TK_SLIMIT},
    {"SMA",            TK_SMA},
    {"SMALLINT",       TK_SMALLINT},
    {"SNODE",          TK_SNODE},
    {"SNODES",         TK_SNODES},
    {"SOFFSET",        TK_SOFFSET},
    {"SPLIT",          TK_SPLIT},
    {"STABLE",         TK_STABLE},
    {"STABLES",        TK_STABLES},
    {"STATE",          TK_STATE},
    {"STATE_WINDOW",   TK_STATE_WINDOW},
    {"STORAGE",        TK_STORAGE},
    {"STREAM",         TK_STREAM},
    {"STREAMS",        TK_STREAMS},
    {"STRICT",         TK_STRICT},
    {"SUBSCRIPTIONS",  TK_SUBSCRIPTIONS},
    {"SYNCDB",         TK_SYNCDB},
    {"SYSINFO",        TK_SYSINFO},
    {"TABLE",          TK_TABLE},
    {"TABLES",         TK_TABLES},
    {"TAG",            TK_TAG},
    {"TAGS",           TK_TAGS},
    {"TBNAME",         TK_TBNAME},
    {"TIMESTAMP",      TK_TIMESTAMP},
    {"TIMEZONE",       TK_TIMEZONE},
    {"TINYINT",        TK_TINYINT},
    {"TO",             TK_TO},
    {"TODAY",          TK_TODAY},
    {"TOPIC",          TK_TOPIC},
    {"TOPICS",         TK_TOPICS},
    {"TRANSACTION",    TK_TRANSACTION},
    {"TRANSACTIONS",   TK_TRANSACTIONS},
    {"TRIGGER",        TK_TRIGGER},
    {"TSERIES",        TK_TSERIES},
    {"TTL",            TK_TTL},
    {"UNION",          TK_UNION},
    {"UNSIGNED",       TK_UNSIGNED},
    {"USE",            TK_USE},
    {"USER",           TK_USER},
    {"USERS",          TK_USERS},
    {"USING",          TK_USING},
    {"VALUE",          TK_VALUE},
    {"VALUES",         TK_VALUES},
    {"VARCHAR",        TK_VARCHAR},
    {"VARIABLES",      TK_VARIABLES},
    {"VERBOSE",        TK_VERBOSE},
    {"VGROUP",         TK_VGROUP},
    {"VGROUPS",        TK_VGROUPS},
    {"VNODES",         TK_VNODES},
    {"WAL",            TK_WAL},
    {"WATERMARK",      TK_WATERMARK},
    {"WHERE",          TK_WHERE},
    {"WINDOW_CLOSE",   TK_WINDOW_CLOSE},
    {"WITH",           TK_WITH},
    {"WRITE",          TK_WRITE},
    {"_C0",            TK_ROWTS},
    {"_QENDTS",        TK_QENDTS},
    {"_QSTARTTS",      TK_QSTARTTS},
    {"_ROWTS",         TK_ROWTS},
    {"_WDURATION",     TK_WDURATION},
    {"_WENDTS",        TK_WENDTS},
    {"_WSTARTTS",      TK_WSTARTTS},
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
    // {"ID",           TK_ID},
    // {"STRING",       TK_STRING},
    // {"EQ",           TK_EQ},
    // {"NE",           TK_NE},
    // {"ISNULL",       TK_ISNULL},
    // {"NOTNULL",      TK_NOTNULL},
    // {"GLOB",         TK_GLOB},
    // {"GT",           TK_GT},
    // {"GE",           TK_GE},
    // {"LT",           TK_LT},
    // {"LE",           TK_LE},
    // {"BITAND",       TK_BITAND},
    // {"BITOR",        TK_BITOR},
    // {"LSHIFT",       TK_LSHIFT},
    // {"RSHIFT",       TK_RSHIFT},
    // {"PLUS",         TK_PLUS},
    // {"DIVIDE",       TK_DIVIDE},
    // {"TIMES",        TK_TIMES},
    // {"STAR",         TK_STAR},
    // {"SLASH",        TK_SLASH},
    // {"REM ",         TK_REM},
    // {"||",           TK_CONCAT},
    // {"UMINUS",       TK_UMINUS},
    // {"UPLUS",        TK_UPLUS},
    // {"BITNOT",       TK_BITNOT},
    // {"DOT",          TK_DOT},
    // {"CTIME",        TK_CTIME},
    // {"LP",           TK_LP},
    // {"RP",           TK_RP},
    // {"COMMA",        TK_COMMA},
    // {"VARIABLE",     TK_VARIABLE},
    // {"UPDATE",       TK_UPDATE},
    // {"CHANGE",       TK_CHANGE},
    // {"COLON",        TK_COLON},
    // {"ABORT",        TK_ABORT},
    // {"AFTER",        TK_AFTER},
    // {"ATTACH",       TK_ATTACH},
    // {"BEFORE",       TK_BEFORE},
    // {"BEGIN",        TK_BEGIN},
    // {"CASCADE",      TK_CASCADE},
    // {"CONFLICT",     TK_CONFLICT},
    // {"COPY",         TK_COPY},
    // {"DEFERRED",     TK_DEFERRED},
    // {"DELIMITERS",   TK_DELIMITERS},
    // {"DETACH",       TK_DETACH},
    // {"EACH",         TK_EACH},
    // {"END",          TK_END},
    // {"FAIL",         TK_FAIL},
    // {"FOR",          TK_FOR},
    // {"IMMEDIATE",    TK_IMMEDIATE},
    // {"INITIALLY",    TK_INITIALLY},
    // {"INSTEAD",      TK_INSTEAD},
    // {"KEY",          TK_KEY},
    // {"OF",           TK_OF},
    // {"RAISE",        TK_RAISE},
    // {"REPLACE",      TK_REPLACE},
    // {"RESTRICT",     TK_RESTRICT},
    // {"ROW",          TK_ROW},
    // {"STATEMENT",    TK_STATEMENT},
    // {"VIEW",         TK_VIEW},
    // {"SEMI",         TK_SEMI},
307
    // {"PARTITIONS",   TK_PARTITIONS},
308
    // {"MODE",         TK_MODE},
H
hzcheng 已提交
309
};
X
Xiaoyu Wang 已提交
310
// clang-format on
H
hzcheng 已提交
311 312 313 314 315 316 317 318 319 320 321 322 323

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
324
static void* keywordHashTable = NULL;
H
hzcheng 已提交
325

S
TD-1057  
Shengliang Guan 已提交
326
static void doInitKeywordsTable(void) {
327
  int numOfEntries = tListLen(keywordTable);
X
Xiaoyu Wang 已提交
328

H
Haojun Liao 已提交
329
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
330
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
331
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
332
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
333
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
334
  }
335 336
}

wafwerar's avatar
wafwerar 已提交
337
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
338

339
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
340
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
X
Xiaoyu Wang 已提交
341

H
hjxilinx 已提交
342
  char key[512] = {0};
X
Xiaoyu Wang 已提交
343
  if (n > tListLen(key)) {  // too long token, can not be any other token type
344
    return TK_NK_ID;
345
  }
X
Xiaoyu Wang 已提交
346

H
hzcheng 已提交
347 348
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
349
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
350 351 352 353 354
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
355
  if (keywordHashTable == NULL) {
356
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
357
  }
H
Haojun Liao 已提交
358

H
Haojun Liao 已提交
359
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
X
Xiaoyu Wang 已提交
360
  return (pKey != NULL) ? (*pKey)->type : TK_NK_ID;
H
hzcheng 已提交
361 362
}

H
huili 已提交
363
/*
364 365 366
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
367
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
368
  uint32_t i;
H
hzcheng 已提交
369 370 371 372 373 374 375 376
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
377
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
378 379 380
      return i;
    }
    case ':': {
381
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
382 383 384 385 386 387
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
388
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
389
        return i;
390 391 392
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
393
      }
X
Xiaoyu Wang 已提交
394
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
395 396 397
      return 1;
    }
    case '(': {
398
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
399 400 401
      return 1;
    }
    case ')': {
402
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
403 404 405
      return 1;
    }
    case ';': {
406
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
407 408 409
      return 1;
    }
    case '+': {
410
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
411 412 413
      return 1;
    }
    case '*': {
414
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
415 416 417 418
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
419
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
420 421 422 423 424
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
425
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
426 427 428
      return i;
    }
    case '%': {
429
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
430 431 432
      return 1;
    }
    case '=': {
433
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
434 435 436 437
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
438
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
439 440
        return 2;
      } else if (z[1] == '>') {
441
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
442 443
        return 2;
      } else if (z[1] == '<') {
444
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
445 446
        return 2;
      } else {
447
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
448 449 450 451 452
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
453
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
454 455
        return 2;
      } else if (z[1] == '>') {
456
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
457 458
        return 2;
      } else {
459
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
460 461 462 463 464
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
465
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
466 467
        return 2;
      } else {
468
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
469 470 471 472 473
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
474
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
475 476
        return 1;
      } else {
477
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
478 479 480 481
        return 2;
      }
    }
    case ',': {
482
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
483 484 485
      return 1;
    }
    case '&': {
486
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
487 488 489
      return 1;
    }
    case '~': {
490
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
491 492
      return 1;
    }
S
slguan 已提交
493
    case '?': {
494
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
495 496
      return 1;
    }
497
    case '`':
H
hzcheng 已提交
498 499
    case '\'':
    case '"': {
S
slguan 已提交
500 501
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
502
      for (i = 1; z[i]; i++) {
503
        if (delim != '`' && z[i] == '\\') {  // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
504 505 506
          i++;
          continue;
        }
X
Xiaoyu Wang 已提交
507

508
        if (z[i] == delim) {
H
hzcheng 已提交
509 510 511
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
512
            strEnd = true;
H
hzcheng 已提交
513 514 515 516
            break;
          }
        }
      }
X
Xiaoyu Wang 已提交
517

H
hzcheng 已提交
518
      if (z[i]) i++;
H
huili 已提交
519

S
slguan 已提交
520
      if (strEnd) {
X
Xiaoyu Wang 已提交
521
        *tokenId = (delim == '`') ? TK_NK_ID : TK_NK_STRING;
S
slguan 已提交
522 523
        return i;
      }
H
huili 已提交
524

S
slguan 已提交
525
      break;
H
hzcheng 已提交
526 527
    }
    case '.': {
S
slguan 已提交
528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

545
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
546 547
        return i;
      } else {
548
        *tokenId = TK_NK_DOT;
S
slguan 已提交
549 550 551 552 553 554 555
        return 1;
      }
    }

    case '0': {
      char next = z[1];

X
Xiaoyu Wang 已提交
556
      if (next == 'b') {  // bin number
557
        *tokenId = TK_NK_BIN;
S
slguan 已提交
558 559 560 561 562 563 564 565
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
X
Xiaoyu Wang 已提交
566
      } else if (next == 'x') {  // hex number
567
        *tokenId = TK_NK_HEX;
S
slguan 已提交
568 569 570 571 572 573 574 575 576
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
577 578 579 580 581 582 583 584 585 586
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
587
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
588 589 590
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
591
      /* here is the 1u/1a/2s/3m/9y */
X
Xiaoyu Wang 已提交
592 593 594
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' ||
           z[i] == 'n' || z[i] == 'y' || z[i] == 'w' || z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' ||
           z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' || z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
595
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
596
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
597 598 599 600 601 602 603 604 605 606
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
607
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
608 609 610 611
        seg++;
      }

      if (seg == 4) {  // ip address
612
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
613
        return i;
X
Xiaoyu Wang 已提交
614 615
      } else if (seg > 2) {
        break;
H
hzcheng 已提交
616 617 618 619 620 621 622 623
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
624
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
625 626 627
      }
      return i;
    }
X
Xiaoyu Wang 已提交
628 629 630 631 632 633
    // case '[': {
    //   for (i = 1; z[i] && z[i - 1] != ']'; i++) {
    //   }
    //   *tokenId = TK_NK_ID;
    //   return i;
    // }
H
hzcheng 已提交
634 635 636 637
    case 'T':
    case 't':
    case 'F':
    case 'f': {
X
Xiaoyu Wang 已提交
638
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
639 640 641
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
642
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
643 644 645 646
        return i;
      }
    }
    default: {
X
Xiaoyu Wang 已提交
647
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
H
hzcheng 已提交
648 649
        break;
      }
X
Xiaoyu Wang 已提交
650
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
651
      }
652
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
653 654 655 656
      return i;
    }
  }

657
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
658 659 660
  return 0;
}

X
Xiaoyu Wang 已提交
661 662 663
SToken tscReplaceStrToken(char** str, SToken* token, const char* newToken) {
  char*   src = *str;
  size_t  nsize = strlen(newToken);
D
dapan1121 已提交
664 665
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
X
Xiaoyu Wang 已提交
666
  SToken  ntoken;
D
dapan1121 已提交
667

wafwerar's avatar
wafwerar 已提交
668
  *str = taosMemoryCalloc(1, size);
D
dapan1121 已提交
669 670

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
671
  strcat(*str, newToken);
D
dapan1121 已提交
672 673
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
674
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
675 676
  ntoken.z = *str + bsize;

wafwerar's avatar
wafwerar 已提交
677
  taosMemoryFreeClear(src);
D
dapan1121 已提交
678 679 680 681

  return ntoken;
}

682
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
683
  SToken t0 = {0};
S
slguan 已提交
684

H
hzcheng 已提交
685 686
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
687 688
    t0.n = 0;
    return t0;
H
hzcheng 已提交
689 690
  }

691
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
692 693 694
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
695
    int32_t numOfComma = 0;
X
Xiaoyu Wang 已提交
696
    char    t = str[*i];
H
Haojun Liao 已提交
697 698 699 700
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
701
      }
X
Xiaoyu Wang 已提交
702

H
Haojun Liao 已提交
703
      t = str[++(*i)];
S
slguan 已提交
704
    }
H
hzcheng 已提交
705

706
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
707
    break;
S
slguan 已提交
708

H
Haojun Liao 已提交
709 710
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
711
    bool ignore = false;
S
slguan 已提交
712 713
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
714
        ignore = true;
S
slguan 已提交
715 716 717 718
        break;
      }
    }

H
Haojun Liao 已提交
719
    if (!ignore) {
S
slguan 已提交
720 721
      break;
    }
H
Haojun Liao 已提交
722
#endif
H
hzcheng 已提交
723 724
  }

725
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
726
    t0.n = 0;
D
dapan1121 已提交
727
    t0.type = 0;
S
slguan 已提交
728 729 730 731 732 733 734 735
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
736
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
737 738

    // only id and string are valid
739
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
740
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
741 742 743 744 745 746 747 748 749
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
750
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
751
      len = tGetToken(&str[*i + t0.n], &type);
752
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
753 754 755
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
756 757 758
    }
  }

X
Xiaoyu Wang 已提交
759
  t0.z = (char*)str + (*i);
S
slguan 已提交
760 761 762
  *i += t0.n;

  return t0;
H
hzcheng 已提交
763 764
}

X
Xiaoyu Wang 已提交
765
bool taosIsKeyWordToken(const char* z, int32_t len) { return (tKeywordCode((char*)z, len) != TK_NK_ID); }
H
Haojun Liao 已提交
766 767

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
768 769 770 771
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
772
}
773

H
Haojun Liao 已提交
774
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
775
  assert(pToken != NULL && buf != NULL && len > pToken->n);
X
Xiaoyu Wang 已提交
776

777 778 779
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
780
  SToken token = *pToken;
781 782 783
  token.z = buf;
  return token;
}