parTokenizer.c 20.4 KB
Newer Older
H
hzcheng 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
 *
 * This program is free software: you can use, redistribute, and/or modify
 * it under the terms of the GNU Affero General Public License, version 3
 * or later ("AGPL"), as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see <http://www.gnu.org/licenses/>.
 */

H
hjxilinx 已提交
16
#include "os.h"
X
Xiaoyu Wang 已提交
17
#include "parToken.h"
H
hjxilinx 已提交
18
#include "taosdef.h"
X
Xiaoyu Wang 已提交
19
#include "thash.h"
20
#include "ttokendef.h"
H
hzcheng 已提交
21

S
slguan 已提交
22
// All the keywords of the SQL language are stored in a hash table
H
hzcheng 已提交
23
typedef struct SKeyword {
S
slguan 已提交
24
  const char* name;  // The keyword name
H
Haojun Liao 已提交
25
  uint16_t    type;  // type
S
slguan 已提交
26
  uint8_t     len;   // length
H
hzcheng 已提交
27 28
} SKeyword;

X
Xiaoyu Wang 已提交
29
// clang-format off
S
slguan 已提交
30
// keywords in sql string
H
hzcheng 已提交
31
static SKeyword keywordTable[] = {
X
Xiaoyu Wang 已提交
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
    {"ACCOUNT",       TK_ACCOUNT},
    {"ACCOUNTS",      TK_ACCOUNTS},
    {"ADD",           TK_ADD},
    {"AGGREGATE",     TK_AGGREGATE},
    {"ALL",           TK_ALL},
    {"ALTER",         TK_ALTER},
    {"ANALYZE",       TK_ANALYZE},
    {"AND",           TK_AND},
    {"APPS",          TK_APPS},
    {"AS",            TK_AS},
    {"ASC",           TK_ASC},
    {"AT_ONCE",       TK_AT_ONCE},
    {"BETWEEN",       TK_BETWEEN},
    {"BINARY",        TK_BINARY},
    {"BIGINT",        TK_BIGINT},
    {"BNODE",         TK_BNODE},
    {"BNODES",        TK_BNODES},
    {"BOOL",          TK_BOOL},
    {"BUFFER",        TK_BUFFER},
    {"BUFSIZE",       TK_BUFSIZE},
    {"BY",            TK_BY},
    {"CACHE",         TK_CACHE},
    {"CACHELAST",     TK_CACHELAST},
    {"CAST",          TK_CAST},
    {"CLUSTER",       TK_CLUSTER},
    {"COLUMN",        TK_COLUMN},
    {"COMMENT",       TK_COMMENT},
    {"COMP",          TK_COMP},
    {"COMPACT",       TK_COMPACT},
    {"CONNS",         TK_CONNS},
    {"CONNECTION",    TK_CONNECTION},
    {"CONNECTIONS",   TK_CONNECTIONS},
64
    {"CONSUMER",      TK_CONSUMER},
X
Xiaoyu Wang 已提交
65 66
    {"COUNT",         TK_COUNT},
    {"CREATE",        TK_CREATE},
dengyihao's avatar
dengyihao 已提交
67
    {"CONTAINS",      TK_CONTAINS}, 
X
Xiaoyu Wang 已提交
68 69 70 71
    {"DATABASE",      TK_DATABASE},
    {"DATABASES",     TK_DATABASES},
    {"DAYS",          TK_DAYS},
    {"DBS",           TK_DBS},
X
Xiaoyu Wang 已提交
72
    {"DELETE",        TK_DELETE},
X
Xiaoyu Wang 已提交
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
    {"DESC",          TK_DESC},
    {"DESCRIBE",      TK_DESCRIBE},
    {"DISTINCT",      TK_DISTINCT},
    {"DNODE",         TK_DNODE},
    {"DNODES",        TK_DNODES},
    {"DOUBLE",        TK_DOUBLE},
    {"DROP",          TK_DROP},
    {"EXISTS",        TK_EXISTS},
    {"EXPLAIN",       TK_EXPLAIN},
    {"FILE_FACTOR",   TK_FILE_FACTOR},
    {"FILL",          TK_FILL},
    {"FIRST",         TK_FIRST},
    {"FLOAT",         TK_FLOAT},
    {"FROM",          TK_FROM},
    {"FSYNC",         TK_FSYNC},
    {"FUNCTION",      TK_FUNCTION},
    {"FUNCTIONS",     TK_FUNCTIONS},
90
    {"GRANT",         TK_GRANT},
X
Xiaoyu Wang 已提交
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
    {"GRANTS",        TK_GRANTS},
    {"GROUP",         TK_GROUP},
    {"HAVING",        TK_HAVING},
    {"IF",            TK_IF},
    {"IMPORT",        TK_IMPORT},
    {"IN",            TK_IN},
    {"INDEX",         TK_INDEX},
    {"INDEXES",       TK_INDEXES},
    {"INNER",         TK_INNER},
    {"INT",           TK_INT},
    {"INSERT",        TK_INSERT},
    {"INTEGER",       TK_INTEGER},
    {"INTERVAL",      TK_INTERVAL},
    {"INTO",          TK_INTO},
    {"IS",            TK_IS},
    {"JOIN",          TK_JOIN},
    {"JSON",          TK_JSON},
    {"KEEP",          TK_KEEP},
    {"KILL",          TK_KILL},
    {"LAST",          TK_LAST},
    {"LAST_ROW",      TK_LAST_ROW},
    {"LICENCE",       TK_LICENCE},
    {"LIKE",          TK_LIKE},
    {"LIMIT",         TK_LIMIT},
    {"LINEAR",        TK_LINEAR},
    {"LOCAL",         TK_LOCAL},
    {"MATCH",         TK_MATCH},
    {"MAXROWS",       TK_MAXROWS},
    {"MINROWS",       TK_MINROWS},
    {"MINUS",         TK_MINUS},
    {"MNODE",         TK_MNODE},
    {"MNODES",        TK_MNODES},
    {"MODIFY",        TK_MODIFY},
    {"MODULES",       TK_MODULES},
    {"NCHAR",         TK_NCHAR},
    {"NEXT",          TK_NEXT},
    {"NMATCH",        TK_NMATCH},
    {"NONE",          TK_NONE},
    {"NOT",           TK_NOT},
    {"NOW",           TK_NOW},
    {"NULL",          TK_NULL},
    {"NULLS",         TK_NULLS},
    {"OFFSET",        TK_OFFSET},
    {"ON",            TK_ON},
    {"OR",            TK_OR},
    {"ORDER",         TK_ORDER},
    {"OUTPUTTYPE",    TK_OUTPUTTYPE},
    {"PARTITION",     TK_PARTITION},
    {"PASS",          TK_PASS},
    {"PAGES",         TK_PAGES},
    {"PAGESIZE",      TK_PAGESIZE},
    {"PORT",          TK_PORT},
    {"PPS",           TK_PPS},
    {"PRECISION",     TK_PRECISION},
    {"PRIVILEGE",     TK_PRIVILEGE},
    {"PREV",          TK_PREV},
    {"QNODE",         TK_QNODE},
    {"QNODES",        TK_QNODES},
    {"QTIME",         TK_QTIME},
    {"QUERIES",       TK_QUERIES},
    {"QUERY",         TK_QUERY},
    {"RATIO",         TK_RATIO},
153 154
    {"READ",          TK_READ},
    {"RENAME",        TK_RENAME},
X
Xiaoyu Wang 已提交
155 156 157
    {"REPLICA",       TK_REPLICA},
    {"RESET",         TK_RESET},
    {"RETENTIONS",    TK_RETENTIONS},
158
    {"REVOKE",        TK_REVOKE},
X
Xiaoyu Wang 已提交
159
    {"ROLLUP",        TK_ROLLUP},
X
Xiaoyu Wang 已提交
160
    {"SCHEMALESS",    TK_SCHEMALESS},
X
Xiaoyu Wang 已提交
161 162 163 164 165
    {"SCORES",        TK_SCORES},
    {"SELECT",        TK_SELECT},
    {"SESSION",       TK_SESSION},
    {"SET",           TK_SET},
    {"SHOW",          TK_SHOW},
166
    {"SINGLE_STABLE", TK_SINGLE_STABLE},
X
Xiaoyu Wang 已提交
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
    {"SLIDING",       TK_SLIDING},
    {"SLIMIT",        TK_SLIMIT},
    {"SMA",           TK_SMA},
    {"SMALLINT",      TK_SMALLINT},
    {"SNODE",         TK_SNODE},
    {"SNODES",        TK_SNODES},
    {"SOFFSET",       TK_SOFFSET},
    {"STABLE",        TK_STABLE},
    {"STABLES",       TK_STABLES},
    {"STATE",         TK_STATE},
    {"STATE_WINDOW",  TK_STATE_WINDOW},
    {"STORAGE",       TK_STORAGE},
    {"STREAM",        TK_STREAM},
    {"STREAMS",       TK_STREAMS},
    {"STRICT",        TK_STRICT},
    {"SYNCDB",        TK_SYNCDB},
    {"TABLE",         TK_TABLE},
    {"TABLES",        TK_TABLES},
    {"TAG",           TK_TAG},
    {"TAGS",          TK_TAGS},
    {"TBNAME",        TK_TBNAME},
    {"TIMESTAMP",     TK_TIMESTAMP},
    {"TIMEZONE",      TK_TIMEZONE},
    {"TINYINT",       TK_TINYINT},
191
    {"TO",            TK_TO},
X
Xiaoyu Wang 已提交
192 193 194
    {"TODAY",         TK_TODAY},
    {"TOPIC",         TK_TOPIC},
    {"TOPICS",        TK_TOPICS},
195 196
    {"TRANSACTION",   TK_TRANSACTION},
    {"TRANSACTIONS",  TK_TRANSACTIONS},
X
Xiaoyu Wang 已提交
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
    {"TRIGGER",       TK_TRIGGER},
    {"TSERIES",       TK_TSERIES},
    {"TTL",           TK_TTL},
    {"UNION",         TK_UNION},
    {"UNSIGNED",      TK_UNSIGNED},
    {"USE",           TK_USE},
    {"USER",          TK_USER},
    {"USERS",         TK_USERS},
    {"USING",         TK_USING},
    {"VALUE",         TK_VALUE},
    {"VALUES",        TK_VALUES},
    {"VARCHAR",       TK_VARCHAR},
    {"VARIABLES",     TK_VARIABLES},
    {"VERBOSE",       TK_VERBOSE},
    {"VGROUPS",       TK_VGROUPS},
    {"VNODES",        TK_VNODES},
    {"WAL",           TK_WAL},
    {"WATERMARK",     TK_WATERMARK},
    {"WHERE",         TK_WHERE},
    {"WINDOW_CLOSE",  TK_WINDOW_CLOSE},
217
    {"WRITE",         TK_WRITE},
X
Xiaoyu Wang 已提交
218
    {"_C0",           TK_ROWTS},
X
Xiaoyu Wang 已提交
219 220 221 222 223 224
    {"_QENDTS",       TK_QENDTS},
    {"_QSTARTTS",     TK_QSTARTTS},
    {"_ROWTS",        TK_ROWTS},
    {"_WDURATION",    TK_WDURATION},
    {"_WENDTS",       TK_WENDTS},
    {"_WSTARTTS",     TK_WSTARTTS},
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
    // {"ID",           TK_ID},
    // {"STRING",       TK_STRING},
    // {"EQ",           TK_EQ},
    // {"NE",           TK_NE},
    // {"ISNULL",       TK_ISNULL},
    // {"NOTNULL",      TK_NOTNULL},
    // {"GLOB",         TK_GLOB},
    // {"GT",           TK_GT},
    // {"GE",           TK_GE},
    // {"LT",           TK_LT},
    // {"LE",           TK_LE},
    // {"BITAND",       TK_BITAND},
    // {"BITOR",        TK_BITOR},
    // {"LSHIFT",       TK_LSHIFT},
    // {"RSHIFT",       TK_RSHIFT},
    // {"PLUS",         TK_PLUS},
    // {"DIVIDE",       TK_DIVIDE},
    // {"TIMES",        TK_TIMES},
    // {"STAR",         TK_STAR},
    // {"SLASH",        TK_SLASH},
    // {"REM ",         TK_REM},
    // {"||",           TK_CONCAT},
    // {"UMINUS",       TK_UMINUS},
    // {"UPLUS",        TK_UPLUS},
    // {"BITNOT",       TK_BITNOT},
    // {"DOT",          TK_DOT},
    // {"CTIME",        TK_CTIME},
    // {"LP",           TK_LP},
    // {"RP",           TK_RP},
    // {"COMMA",        TK_COMMA},
    // {"EVERY",        TK_EVERY},
    // {"VARIABLE",     TK_VARIABLE},
    // {"UPDATE",       TK_UPDATE},
    // {"CHANGE",       TK_CHANGE},
    // {"COLON",        TK_COLON},
    // {"ABORT",        TK_ABORT},
    // {"AFTER",        TK_AFTER},
    // {"ATTACH",       TK_ATTACH},
    // {"BEFORE",       TK_BEFORE},
    // {"BEGIN",        TK_BEGIN},
    // {"CASCADE",      TK_CASCADE},
    // {"CONFLICT",     TK_CONFLICT},
    // {"COPY",         TK_COPY},
    // {"DEFERRED",     TK_DEFERRED},
    // {"DELIMITERS",   TK_DELIMITERS},
    // {"DETACH",       TK_DETACH},
    // {"EACH",         TK_EACH},
    // {"END",          TK_END},
    // {"FAIL",         TK_FAIL},
    // {"FOR",          TK_FOR},
    // {"IGNORE",       TK_IGNORE},
    // {"IMMEDIATE",    TK_IMMEDIATE},
    // {"INITIALLY",    TK_INITIALLY},
    // {"INSTEAD",      TK_INSTEAD},
    // {"KEY",          TK_KEY},
    // {"OF",           TK_OF},
    // {"RAISE",        TK_RAISE},
    // {"REPLACE",      TK_REPLACE},
    // {"RESTRICT",     TK_RESTRICT},
    // {"ROW",          TK_ROW},
    // {"STATEMENT",    TK_STATEMENT},
    // {"VIEW",         TK_VIEW},
    // {"SEMI",         TK_SEMI},
288
    // {"PARTITIONS",   TK_PARTITIONS},
289
    // {"MODE",         TK_MODE},
H
hzcheng 已提交
290
};
X
Xiaoyu Wang 已提交
291
// clang-format on
H
hzcheng 已提交
292 293 294 295 296 297 298 299 300 301 302 303 304

static const char isIdChar[] = {
    /* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */
};

H
Haojun Liao 已提交
305
static void* keywordHashTable = NULL;
H
hzcheng 已提交
306

S
TD-1057  
Shengliang Guan 已提交
307
static void doInitKeywordsTable(void) {
308
  int numOfEntries = tListLen(keywordTable);
X
Xiaoyu Wang 已提交
309

H
Haojun Liao 已提交
310
  keywordHashTable = taosHashInit(numOfEntries, MurmurHash3_32, true, false);
311
  for (int32_t i = 0; i < numOfEntries; i++) {
S
TD-1057  
Shengliang Guan 已提交
312
    keywordTable[i].len = (uint8_t)strlen(keywordTable[i].name);
313
    void* ptr = &keywordTable[i];
H
Haojun Liao 已提交
314
    taosHashPut(keywordHashTable, keywordTable[i].name, keywordTable[i].len, (void*)&ptr, POINTER_BYTES);
H
hzcheng 已提交
315
  }
316 317
}

wafwerar's avatar
wafwerar 已提交
318
static TdThreadOnce keywordsHashTableInit = PTHREAD_ONCE_INIT;
319

320
static int32_t tKeywordCode(const char* z, int n) {
wafwerar's avatar
wafwerar 已提交
321
  taosThreadOnce(&keywordsHashTableInit, doInitKeywordsTable);
X
Xiaoyu Wang 已提交
322

H
hjxilinx 已提交
323
  char key[512] = {0};
X
Xiaoyu Wang 已提交
324
  if (n > tListLen(key)) {  // too long token, can not be any other token type
325
    return TK_NK_ID;
326
  }
X
Xiaoyu Wang 已提交
327

H
hzcheng 已提交
328 329
  for (int32_t j = 0; j < n; ++j) {
    if (z[j] >= 'a' && z[j] <= 'z') {
330
      key[j] = (char)(z[j] & 0xDF);  // to uppercase and set the null-terminated
H
hzcheng 已提交
331 332 333 334 335
    } else {
      key[j] = z[j];
    }
  }

D
dapan1121 已提交
336
  if (keywordHashTable == NULL) {
337
    return TK_NK_ILLEGAL;
D
dapan1121 已提交
338
  }
H
Haojun Liao 已提交
339

H
Haojun Liao 已提交
340
  SKeyword** pKey = (SKeyword**)taosHashGet(keywordHashTable, key, n);
X
Xiaoyu Wang 已提交
341
  return (pKey != NULL) ? (*pKey)->type : TK_NK_ID;
H
hzcheng 已提交
342 343
}

H
huili 已提交
344
/*
345 346 347
 * Return the length of the token that begins at z[0].
 * Store the token type in *type before returning.
 */
348
uint32_t tGetToken(const char* z, uint32_t* tokenId) {
349
  uint32_t i;
H
hzcheng 已提交
350 351 352 353 354 355 356 357
  switch (*z) {
    case ' ':
    case '\t':
    case '\n':
    case '\f':
    case '\r': {
      for (i = 1; isspace(z[i]); i++) {
      }
358
      *tokenId = TK_NK_SPACE;
H
hzcheng 已提交
359 360 361
      return i;
    }
    case ':': {
362
      *tokenId = TK_NK_COLON;
H
hzcheng 已提交
363 364 365 366 367 368
      return 1;
    }
    case '-': {
      if (z[1] == '-') {
        for (i = 2; z[i] && z[i] != '\n'; i++) {
        }
369
        *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
370
        return i;
371 372 373
      } else if (z[1] == '>') {
        *tokenId = TK_NK_ARROW;
        return 2;
H
hzcheng 已提交
374
      }
X
Xiaoyu Wang 已提交
375
      *tokenId = TK_NK_MINUS;
H
hzcheng 已提交
376 377 378
      return 1;
    }
    case '(': {
379
      *tokenId = TK_NK_LP;
H
hzcheng 已提交
380 381 382
      return 1;
    }
    case ')': {
383
      *tokenId = TK_NK_RP;
H
hzcheng 已提交
384 385 386
      return 1;
    }
    case ';': {
387
      *tokenId = TK_NK_SEMI;
H
hzcheng 已提交
388 389 390
      return 1;
    }
    case '+': {
391
      *tokenId = TK_NK_PLUS;
H
hzcheng 已提交
392 393 394
      return 1;
    }
    case '*': {
395
      *tokenId = TK_NK_STAR;
H
hzcheng 已提交
396 397 398 399
      return 1;
    }
    case '/': {
      if (z[1] != '*' || z[2] == 0) {
400
        *tokenId = TK_NK_SLASH;
H
hzcheng 已提交
401 402 403 404 405
        return 1;
      }
      for (i = 3; z[i] && (z[i] != '/' || z[i - 1] != '*'); i++) {
      }
      if (z[i]) i++;
406
      *tokenId = TK_NK_COMMENT;
H
hzcheng 已提交
407 408 409
      return i;
    }
    case '%': {
410
      *tokenId = TK_NK_REM;
H
hzcheng 已提交
411 412 413
      return 1;
    }
    case '=': {
414
      *tokenId = TK_NK_EQ;
H
hzcheng 已提交
415 416 417 418
      return 1 + (z[1] == '=');
    }
    case '<': {
      if (z[1] == '=') {
419
        *tokenId = TK_NK_LE;
H
hzcheng 已提交
420 421
        return 2;
      } else if (z[1] == '>') {
422
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
423 424
        return 2;
      } else if (z[1] == '<') {
425
        *tokenId = TK_NK_LSHIFT;
H
hzcheng 已提交
426 427
        return 2;
      } else {
428
        *tokenId = TK_NK_LT;
H
hzcheng 已提交
429 430 431 432 433
        return 1;
      }
    }
    case '>': {
      if (z[1] == '=') {
434
        *tokenId = TK_NK_GE;
H
hzcheng 已提交
435 436
        return 2;
      } else if (z[1] == '>') {
437
        *tokenId = TK_NK_RSHIFT;
H
hzcheng 已提交
438 439
        return 2;
      } else {
440
        *tokenId = TK_NK_GT;
H
hzcheng 已提交
441 442 443 444 445
        return 1;
      }
    }
    case '!': {
      if (z[1] != '=') {
446
        *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
447 448
        return 2;
      } else {
449
        *tokenId = TK_NK_NE;
H
hzcheng 已提交
450 451 452 453 454
        return 2;
      }
    }
    case '|': {
      if (z[1] != '|') {
455
        *tokenId = TK_NK_BITOR;
H
hzcheng 已提交
456 457
        return 1;
      } else {
458
        *tokenId = TK_NK_CONCAT;
H
hzcheng 已提交
459 460 461 462
        return 2;
      }
    }
    case ',': {
463
      *tokenId = TK_NK_COMMA;
H
hzcheng 已提交
464 465 466
      return 1;
    }
    case '&': {
467
      *tokenId = TK_NK_BITAND;
H
hzcheng 已提交
468 469 470
      return 1;
    }
    case '~': {
471
      *tokenId = TK_NK_BITNOT;
H
hzcheng 已提交
472 473
      return 1;
    }
S
slguan 已提交
474
    case '?': {
475
      *tokenId = TK_NK_QUESTION;
S
slguan 已提交
476 477
      return 1;
    }
478
    case '`':
H
hzcheng 已提交
479 480
    case '\'':
    case '"': {
S
slguan 已提交
481 482
      int  delim = z[0];
      bool strEnd = false;
H
hzcheng 已提交
483
      for (i = 1; z[i]; i++) {
484
        if (delim != '`' && z[i] == '\\') {  // ignore the escaped character that follows this backslash
L
[1292]  
lihui 已提交
485 486 487
          i++;
          continue;
        }
X
Xiaoyu Wang 已提交
488

489
        if (z[i] == delim) {
H
hzcheng 已提交
490 491 492
          if (z[i + 1] == delim) {
            i++;
          } else {
H
huili 已提交
493
            strEnd = true;
H
hzcheng 已提交
494 495 496 497
            break;
          }
        }
      }
X
Xiaoyu Wang 已提交
498

H
hzcheng 已提交
499
      if (z[i]) i++;
H
huili 已提交
500

S
slguan 已提交
501
      if (strEnd) {
X
Xiaoyu Wang 已提交
502
        *tokenId = (delim == '`') ? TK_NK_ID : TK_NK_STRING;
S
slguan 已提交
503 504
        return i;
      }
H
huili 已提交
505

S
slguan 已提交
506
      break;
H
hzcheng 已提交
507 508
    }
    case '.': {
S
slguan 已提交
509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
      /*
       * handle the the float number with out integer part
       * .123
       * .123e4
       */
      if (isdigit(z[1])) {
        for (i = 2; isdigit(z[i]); i++) {
        }

        if ((z[i] == 'e' || z[i] == 'E') &&
            (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
          i += 2;
          while (isdigit(z[i])) {
            i++;
          }
        }

526
        *tokenId = TK_NK_FLOAT;
S
slguan 已提交
527 528
        return i;
      } else {
529
        *tokenId = TK_NK_DOT;
S
slguan 已提交
530 531 532 533 534 535 536
        return 1;
      }
    }

    case '0': {
      char next = z[1];

X
Xiaoyu Wang 已提交
537
      if (next == 'b') {  // bin number
538
        *tokenId = TK_NK_BIN;
S
slguan 已提交
539 540 541 542 543 544 545 546
        for (i = 2; (z[i] == '0' || z[i] == '1'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
X
Xiaoyu Wang 已提交
547
      } else if (next == 'x') {  // hex number
548
        *tokenId = TK_NK_HEX;
S
slguan 已提交
549 550 551 552 553 554 555 556 557
        for (i = 2; isdigit(z[i]) || (z[i] >= 'a' && z[i] <= 'f') || (z[i] >= 'A' && z[i] <= 'F'); ++i) {
        }

        if (i == 2) {
          break;
        }

        return i;
      }
H
hzcheng 已提交
558 559 560 561 562 563 564 565 566 567
    }
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9': {
568
      *tokenId = TK_NK_INTEGER;
H
hzcheng 已提交
569 570 571
      for (i = 1; isdigit(z[i]); i++) {
      }

H
Haojun Liao 已提交
572
      /* here is the 1u/1a/2s/3m/9y */
X
Xiaoyu Wang 已提交
573 574 575
      if ((z[i] == 'b' || z[i] == 'u' || z[i] == 'a' || z[i] == 's' || z[i] == 'm' || z[i] == 'h' || z[i] == 'd' ||
           z[i] == 'n' || z[i] == 'y' || z[i] == 'w' || z[i] == 'B' || z[i] == 'U' || z[i] == 'A' || z[i] == 'S' ||
           z[i] == 'M' || z[i] == 'H' || z[i] == 'D' || z[i] == 'N' || z[i] == 'Y' || z[i] == 'W') &&
H
hjxilinx 已提交
576
          (isIdChar[(uint8_t)z[i + 1]] == 0)) {
577
        *tokenId = TK_NK_VARIABLE;
H
hzcheng 已提交
578 579 580 581 582 583 584 585 586 587
        i += 1;
        return i;
      }

      int32_t seg = 1;
      while (z[i] == '.' && isdigit(z[i + 1])) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
588
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
589 590 591 592
        seg++;
      }

      if (seg == 4) {  // ip address
593
        *tokenId = TK_NK_IPTOKEN;
H
hzcheng 已提交
594
        return i;
X
Xiaoyu Wang 已提交
595 596
      } else if (seg > 2) {
        break;
H
hzcheng 已提交
597 598 599 600 601 602 603 604
      }

      if ((z[i] == 'e' || z[i] == 'E') &&
          (isdigit(z[i + 1]) || ((z[i + 1] == '+' || z[i + 1] == '-') && isdigit(z[i + 2])))) {
        i += 2;
        while (isdigit(z[i])) {
          i++;
        }
605
        *tokenId = TK_NK_FLOAT;
H
hzcheng 已提交
606 607 608
      }
      return i;
    }
X
Xiaoyu Wang 已提交
609 610 611 612 613 614
    // case '[': {
    //   for (i = 1; z[i] && z[i - 1] != ']'; i++) {
    //   }
    //   *tokenId = TK_NK_ID;
    //   return i;
    // }
H
hzcheng 已提交
615 616 617 618
    case 'T':
    case 't':
    case 'F':
    case 'f': {
X
Xiaoyu Wang 已提交
619
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
620 621 622
      }

      if ((i == 4 && strncasecmp(z, "true", 4) == 0) || (i == 5 && strncasecmp(z, "false", 5) == 0)) {
623
        *tokenId = TK_NK_BOOL;
H
hzcheng 已提交
624 625 626 627
        return i;
      }
    }
    default: {
X
Xiaoyu Wang 已提交
628
      if (((*z & 0x80) != 0) || !isIdChar[(uint8_t)*z]) {
H
hzcheng 已提交
629 630
        break;
      }
X
Xiaoyu Wang 已提交
631
      for (i = 1; ((z[i] & 0x80) == 0) && isIdChar[(uint8_t)z[i]]; i++) {
H
hzcheng 已提交
632
      }
633
      *tokenId = tKeywordCode(z, i);
H
hzcheng 已提交
634 635 636 637
      return i;
    }
  }

638
  *tokenId = TK_NK_ILLEGAL;
H
hzcheng 已提交
639 640 641
  return 0;
}

X
Xiaoyu Wang 已提交
642 643 644
SToken tscReplaceStrToken(char** str, SToken* token, const char* newToken) {
  char*   src = *str;
  size_t  nsize = strlen(newToken);
D
dapan1121 已提交
645 646
  int32_t size = (int32_t)strlen(*str) - token->n + (int32_t)nsize + 1;
  int32_t bsize = (int32_t)((uint64_t)token->z - (uint64_t)src);
X
Xiaoyu Wang 已提交
647
  SToken  ntoken;
D
dapan1121 已提交
648

wafwerar's avatar
wafwerar 已提交
649
  *str = taosMemoryCalloc(1, size);
D
dapan1121 已提交
650 651

  strncpy(*str, src, bsize);
H
Haojun Liao 已提交
652
  strcat(*str, newToken);
D
dapan1121 已提交
653 654
  strcat(*str, token->z + token->n);

D
dapan1121 已提交
655
  ntoken.n = (uint32_t)nsize;
D
dapan1121 已提交
656 657
  ntoken.z = *str + bsize;

wafwerar's avatar
wafwerar 已提交
658
  taosMemoryFreeClear(src);
D
dapan1121 已提交
659 660 661 662

  return ntoken;
}

663
SToken tStrGetToken(const char* str, int32_t* i, bool isPrevOptr) {
H
Haojun Liao 已提交
664
  SToken t0 = {0};
S
slguan 已提交
665

H
hzcheng 已提交
666 667
  // here we reach the end of sql string, null-terminated string
  if (str[*i] == 0) {
S
slguan 已提交
668 669
    t0.n = 0;
    return t0;
H
hzcheng 已提交
670 671
  }

672
  // IGNORE TK_NK_SPACE, TK_NK_COMMA, and specified tokens
S
slguan 已提交
673 674 675
  while (1) {
    *i += t0.n;

H
Haojun Liao 已提交
676
    int32_t numOfComma = 0;
X
Xiaoyu Wang 已提交
677
    char    t = str[*i];
H
Haojun Liao 已提交
678 679 680 681
    while (t == ' ' || t == '\n' || t == '\r' || t == '\t' || t == '\f' || t == ',') {
      if (t == ',' && (++numOfComma > 1)) {  // comma only allowed once
        t0.n = 0;
        return t0;
S
slguan 已提交
682
      }
X
Xiaoyu Wang 已提交
683

H
Haojun Liao 已提交
684
      t = str[++(*i)];
S
slguan 已提交
685
    }
H
hzcheng 已提交
686

687
    t0.n = tGetToken(&str[*i], &t0.type);
H
Haojun Liao 已提交
688
    break;
S
slguan 已提交
689

H
Haojun Liao 已提交
690 691
    // not support user specfied ignored symbol list
#if 0
H
Haojun Liao 已提交
692
    bool ignore = false;
S
slguan 已提交
693 694
    for (uint32_t k = 0; k < numOfIgnoreToken; k++) {
      if (t0.type == ignoreTokenTypes[k]) {
H
Haojun Liao 已提交
695
        ignore = true;
S
slguan 已提交
696 697 698 699
        break;
      }
    }

H
Haojun Liao 已提交
700
    if (!ignore) {
S
slguan 已提交
701 702
      break;
    }
H
Haojun Liao 已提交
703
#endif
H
hzcheng 已提交
704 705
  }

706
  if (t0.type == TK_NK_SEMI) {
S
slguan 已提交
707
    t0.n = 0;
D
dapan1121 已提交
708
    t0.type = 0;
S
slguan 已提交
709 710 711 712 713 714 715 716
    return t0;
  }

  uint32_t type = 0;
  int32_t  len;

  // support parse the 'db.tbl' format, notes: There should be no space on either side of the dot!
  if ('.' == str[*i + t0.n]) {
717
    len = tGetToken(&str[*i + t0.n + 1], &type);
S
slguan 已提交
718 719

    // only id and string are valid
720
    if ((TK_NK_STRING != t0.type) && (TK_NK_ID != t0.type)) {
721
      t0.type = TK_NK_ILLEGAL;
S
slguan 已提交
722 723 724 725 726 727 728 729 730
      t0.n = 0;

      return t0;
    }

    t0.n += len + 1;

  } else {
    // support parse the -/+number format
X
Xiaoyu Wang 已提交
731
    if ((isPrevOptr) && (t0.type == TK_NK_MINUS || t0.type == TK_NK_PLUS)) {
732
      len = tGetToken(&str[*i + t0.n], &type);
733
      if (type == TK_NK_INTEGER || type == TK_NK_FLOAT) {
S
slguan 已提交
734 735 736
        t0.type = type;
        t0.n += len;
      }
H
hzcheng 已提交
737 738 739
    }
  }

X
Xiaoyu Wang 已提交
740
  t0.z = (char*)str + (*i);
S
slguan 已提交
741 742 743
  *i += t0.n;

  return t0;
H
hzcheng 已提交
744 745
}

X
Xiaoyu Wang 已提交
746
bool taosIsKeyWordToken(const char* z, int32_t len) { return (tKeywordCode((char*)z, len) != TK_NK_ID); }
H
Haojun Liao 已提交
747 748

void taosCleanupKeywordsTable() {
H
Haojun Liao 已提交
749 750 751 752
  void* m = keywordHashTable;
  if (m != NULL && atomic_val_compare_exchange_ptr(&keywordHashTable, m, 0) == m) {
    taosHashCleanup(m);
  }
Y
yihaoDeng 已提交
753
}
754

H
Haojun Liao 已提交
755
SToken taosTokenDup(SToken* pToken, char* buf, int32_t len) {
756
  assert(pToken != NULL && buf != NULL && len > pToken->n);
X
Xiaoyu Wang 已提交
757

758 759 760
  strncpy(buf, pToken->z, pToken->n);
  buf[pToken->n] = 0;

H
Haojun Liao 已提交
761
  SToken token = *pToken;
762 763 764
  token.z = buf;
  return token;
}